xref: /linux/net/vmw_vsock/vmci_transport_notify.c (revision 99a5e178bde4b0fa1f25ca8d9caee0cb5e329e7c)
1d021c344SAndy King /*
2d021c344SAndy King  * VMware vSockets Driver
3d021c344SAndy King  *
4d021c344SAndy King  * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
5d021c344SAndy King  *
6d021c344SAndy King  * This program is free software; you can redistribute it and/or modify it
7d021c344SAndy King  * under the terms of the GNU General Public License as published by the Free
8d021c344SAndy King  * Software Foundation version 2 and no later version.
9d021c344SAndy King  *
10d021c344SAndy King  * This program is distributed in the hope that it will be useful, but WITHOUT
11d021c344SAndy King  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12d021c344SAndy King  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13d021c344SAndy King  * more details.
14d021c344SAndy King  */
15d021c344SAndy King 
16d021c344SAndy King #include <linux/types.h>
17d021c344SAndy King #include <linux/socket.h>
18d021c344SAndy King #include <linux/stddef.h>
19d021c344SAndy King #include <net/sock.h>
20d021c344SAndy King 
21d021c344SAndy King #include "vmci_transport_notify.h"
22d021c344SAndy King 
23d021c344SAndy King #define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
24d021c344SAndy King 
25d021c344SAndy King static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
26d021c344SAndy King {
27d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
28d021c344SAndy King 	bool retval;
29d021c344SAndy King 	u64 notify_limit;
30d021c344SAndy King 
31d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_write))
32d021c344SAndy King 		return false;
33d021c344SAndy King 
34d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
35d021c344SAndy King 	/* When the sender blocks, we take that as a sign that the sender is
36d021c344SAndy King 	 * faster than the receiver. To reduce the transmit rate of the sender,
37d021c344SAndy King 	 * we delay the sending of the read notification by decreasing the
38d021c344SAndy King 	 * write_notify_window. The notification is delayed until the number of
39d021c344SAndy King 	 * bytes used in the queue drops below the write_notify_window.
40d021c344SAndy King 	 */
41d021c344SAndy King 
42d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
43d021c344SAndy King 		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
44d021c344SAndy King 		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
45d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) =
46d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window);
47d021c344SAndy King 		} else {
48d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
49d021c344SAndy King 			if (PKT_FIELD(vsk, write_notify_window) <
50d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window))
51d021c344SAndy King 				PKT_FIELD(vsk, write_notify_window) =
52d021c344SAndy King 				    PKT_FIELD(vsk, write_notify_min_window);
53d021c344SAndy King 
54d021c344SAndy King 		}
55d021c344SAndy King 	}
56d021c344SAndy King 	notify_limit = vmci_trans(vsk)->consume_size -
57d021c344SAndy King 		PKT_FIELD(vsk, write_notify_window);
58d021c344SAndy King #else
59d021c344SAndy King 	notify_limit = 0;
60d021c344SAndy King #endif
61d021c344SAndy King 
62d021c344SAndy King 	/* For now we ignore the wait information and just see if the free
63d021c344SAndy King 	 * space exceeds the notify limit.  Note that improving this function
64d021c344SAndy King 	 * to be more intelligent will not require a protocol change and will
65d021c344SAndy King 	 * retain compatibility between endpoints with mixed versions of this
66d021c344SAndy King 	 * function.
67d021c344SAndy King 	 *
68d021c344SAndy King 	 * The notify_limit is used to delay notifications in the case where
69d021c344SAndy King 	 * flow control is enabled. Below the test is expressed in terms of
70d021c344SAndy King 	 * free space in the queue: if free_space > ConsumeSize -
71d021c344SAndy King 	 * write_notify_window then notify An alternate way of expressing this
72d021c344SAndy King 	 * is to rewrite the expression to use the data ready in the receive
73d021c344SAndy King 	 * queue: if write_notify_window > bufferReady then notify as
74d021c344SAndy King 	 * free_space == ConsumeSize - bufferReady.
75d021c344SAndy King 	 */
76d021c344SAndy King 	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
77d021c344SAndy King 		notify_limit;
78d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
79d021c344SAndy King 	if (retval) {
80d021c344SAndy King 		/*
81d021c344SAndy King 		 * Once we notify the peer, we reset the detected flag so the
82d021c344SAndy King 		 * next wait will again cause a decrease in the window size.
83d021c344SAndy King 		 */
84d021c344SAndy King 
85d021c344SAndy King 		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
86d021c344SAndy King 	}
87d021c344SAndy King #endif
88d021c344SAndy King 	return retval;
89d021c344SAndy King #else
90d021c344SAndy King 	return true;
91d021c344SAndy King #endif
92d021c344SAndy King }
93d021c344SAndy King 
94d021c344SAndy King static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
95d021c344SAndy King {
96d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
97d021c344SAndy King 	if (!PKT_FIELD(vsk, peer_waiting_read))
98d021c344SAndy King 		return false;
99d021c344SAndy King 
100d021c344SAndy King 	/* For now we ignore the wait information and just see if there is any
101d021c344SAndy King 	 * data for our peer to read.  Note that improving this function to be
102d021c344SAndy King 	 * more intelligent will not require a protocol change and will retain
103d021c344SAndy King 	 * compatibility between endpoints with mixed versions of this
104d021c344SAndy King 	 * function.
105d021c344SAndy King 	 */
106d021c344SAndy King 	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
107d021c344SAndy King #else
108d021c344SAndy King 	return true;
109d021c344SAndy King #endif
110d021c344SAndy King }
111d021c344SAndy King 
112d021c344SAndy King static void
113d021c344SAndy King vmci_transport_handle_waiting_read(struct sock *sk,
114d021c344SAndy King 				   struct vmci_transport_packet *pkt,
115d021c344SAndy King 				   bool bottom_half,
116d021c344SAndy King 				   struct sockaddr_vm *dst,
117d021c344SAndy King 				   struct sockaddr_vm *src)
118d021c344SAndy King {
119d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
120d021c344SAndy King 	struct vsock_sock *vsk;
121d021c344SAndy King 
122d021c344SAndy King 	vsk = vsock_sk(sk);
123d021c344SAndy King 
124d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_read) = true;
125d021c344SAndy King 	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
126d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
127d021c344SAndy King 
128d021c344SAndy King 	if (vmci_transport_notify_waiting_read(vsk)) {
129d021c344SAndy King 		bool sent;
130d021c344SAndy King 
131d021c344SAndy King 		if (bottom_half)
132d021c344SAndy King 			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
133d021c344SAndy King 		else
134d021c344SAndy King 			sent = vmci_transport_send_wrote(sk) > 0;
135d021c344SAndy King 
136d021c344SAndy King 		if (sent)
137d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_read) = false;
138d021c344SAndy King 	}
139d021c344SAndy King #endif
140d021c344SAndy King }
141d021c344SAndy King 
142d021c344SAndy King static void
143d021c344SAndy King vmci_transport_handle_waiting_write(struct sock *sk,
144d021c344SAndy King 				    struct vmci_transport_packet *pkt,
145d021c344SAndy King 				    bool bottom_half,
146d021c344SAndy King 				    struct sockaddr_vm *dst,
147d021c344SAndy King 				    struct sockaddr_vm *src)
148d021c344SAndy King {
149d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
150d021c344SAndy King 	struct vsock_sock *vsk;
151d021c344SAndy King 
152d021c344SAndy King 	vsk = vsock_sk(sk);
153d021c344SAndy King 
154d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write) = true;
155d021c344SAndy King 	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
156d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
157d021c344SAndy King 
158d021c344SAndy King 	if (vmci_transport_notify_waiting_write(vsk)) {
159d021c344SAndy King 		bool sent;
160d021c344SAndy King 
161d021c344SAndy King 		if (bottom_half)
162d021c344SAndy King 			sent = vmci_transport_send_read_bh(dst, src) > 0;
163d021c344SAndy King 		else
164d021c344SAndy King 			sent = vmci_transport_send_read(sk) > 0;
165d021c344SAndy King 
166d021c344SAndy King 		if (sent)
167d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_write) = false;
168d021c344SAndy King 	}
169d021c344SAndy King #endif
170d021c344SAndy King }
171d021c344SAndy King 
172d021c344SAndy King static void
173d021c344SAndy King vmci_transport_handle_read(struct sock *sk,
174d021c344SAndy King 			   struct vmci_transport_packet *pkt,
175d021c344SAndy King 			   bool bottom_half,
176d021c344SAndy King 			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
177d021c344SAndy King {
178d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
179d021c344SAndy King 	struct vsock_sock *vsk;
180d021c344SAndy King 
181d021c344SAndy King 	vsk = vsock_sk(sk);
182d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_write) = false;
183d021c344SAndy King #endif
184d021c344SAndy King 
185d021c344SAndy King 	sk->sk_write_space(sk);
186d021c344SAndy King }
187d021c344SAndy King 
188d021c344SAndy King static bool send_waiting_read(struct sock *sk, u64 room_needed)
189d021c344SAndy King {
190d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
191d021c344SAndy King 	struct vsock_sock *vsk;
192d021c344SAndy King 	struct vmci_transport_waiting_info waiting_info;
193d021c344SAndy King 	u64 tail;
194d021c344SAndy King 	u64 head;
195d021c344SAndy King 	u64 room_left;
196d021c344SAndy King 	bool ret;
197d021c344SAndy King 
198d021c344SAndy King 	vsk = vsock_sk(sk);
199d021c344SAndy King 
200d021c344SAndy King 	if (PKT_FIELD(vsk, sent_waiting_read))
201d021c344SAndy King 		return true;
202d021c344SAndy King 
203d021c344SAndy King 	if (PKT_FIELD(vsk, write_notify_window) <
204d021c344SAndy King 			vmci_trans(vsk)->consume_size)
205d021c344SAndy King 		PKT_FIELD(vsk, write_notify_window) =
206d021c344SAndy King 		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
207d021c344SAndy King 			vmci_trans(vsk)->consume_size);
208d021c344SAndy King 
209d021c344SAndy King 	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
210d021c344SAndy King 	room_left = vmci_trans(vsk)->consume_size - head;
211d021c344SAndy King 	if (room_needed >= room_left) {
212d021c344SAndy King 		waiting_info.offset = room_needed - room_left;
213d021c344SAndy King 		waiting_info.generation =
214d021c344SAndy King 		    PKT_FIELD(vsk, consume_q_generation) + 1;
215d021c344SAndy King 	} else {
216d021c344SAndy King 		waiting_info.offset = head + room_needed;
217d021c344SAndy King 		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
218d021c344SAndy King 	}
219d021c344SAndy King 
220d021c344SAndy King 	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
221d021c344SAndy King 	if (ret)
222d021c344SAndy King 		PKT_FIELD(vsk, sent_waiting_read) = true;
223d021c344SAndy King 
224d021c344SAndy King 	return ret;
225d021c344SAndy King #else
226d021c344SAndy King 	return true;
227d021c344SAndy King #endif
228d021c344SAndy King }
229d021c344SAndy King 
230d021c344SAndy King static bool send_waiting_write(struct sock *sk, u64 room_needed)
231d021c344SAndy King {
232d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
233d021c344SAndy King 	struct vsock_sock *vsk;
234d021c344SAndy King 	struct vmci_transport_waiting_info waiting_info;
235d021c344SAndy King 	u64 tail;
236d021c344SAndy King 	u64 head;
237d021c344SAndy King 	u64 room_left;
238d021c344SAndy King 	bool ret;
239d021c344SAndy King 
240d021c344SAndy King 	vsk = vsock_sk(sk);
241d021c344SAndy King 
242d021c344SAndy King 	if (PKT_FIELD(vsk, sent_waiting_write))
243d021c344SAndy King 		return true;
244d021c344SAndy King 
245d021c344SAndy King 	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
246d021c344SAndy King 	room_left = vmci_trans(vsk)->produce_size - tail;
247d021c344SAndy King 	if (room_needed + 1 >= room_left) {
248d021c344SAndy King 		/* Wraps around to current generation. */
249d021c344SAndy King 		waiting_info.offset = room_needed + 1 - room_left;
250d021c344SAndy King 		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
251d021c344SAndy King 	} else {
252d021c344SAndy King 		waiting_info.offset = tail + room_needed + 1;
253d021c344SAndy King 		waiting_info.generation =
254d021c344SAndy King 		    PKT_FIELD(vsk, produce_q_generation) - 1;
255d021c344SAndy King 	}
256d021c344SAndy King 
257d021c344SAndy King 	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
258d021c344SAndy King 	if (ret)
259d021c344SAndy King 		PKT_FIELD(vsk, sent_waiting_write) = true;
260d021c344SAndy King 
261d021c344SAndy King 	return ret;
262d021c344SAndy King #else
263d021c344SAndy King 	return true;
264d021c344SAndy King #endif
265d021c344SAndy King }
266d021c344SAndy King 
267d021c344SAndy King static int vmci_transport_send_read_notification(struct sock *sk)
268d021c344SAndy King {
269d021c344SAndy King 	struct vsock_sock *vsk;
270d021c344SAndy King 	bool sent_read;
271d021c344SAndy King 	unsigned int retries;
272d021c344SAndy King 	int err;
273d021c344SAndy King 
274d021c344SAndy King 	vsk = vsock_sk(sk);
275d021c344SAndy King 	sent_read = false;
276d021c344SAndy King 	retries = 0;
277d021c344SAndy King 	err = 0;
278d021c344SAndy King 
279d021c344SAndy King 	if (vmci_transport_notify_waiting_write(vsk)) {
280d021c344SAndy King 		/* Notify the peer that we have read, retrying the send on
281d021c344SAndy King 		 * failure up to our maximum value.  XXX For now we just log
282d021c344SAndy King 		 * the failure, but later we should schedule a work item to
283d021c344SAndy King 		 * handle the resend until it succeeds.  That would require
284d021c344SAndy King 		 * keeping track of work items in the vsk and cleaning them up
285d021c344SAndy King 		 * upon socket close.
286d021c344SAndy King 		 */
287d021c344SAndy King 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
288d021c344SAndy King 		       !sent_read &&
289d021c344SAndy King 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
290d021c344SAndy King 			err = vmci_transport_send_read(sk);
291d021c344SAndy King 			if (err >= 0)
292d021c344SAndy King 				sent_read = true;
293d021c344SAndy King 
294d021c344SAndy King 			retries++;
295d021c344SAndy King 		}
296d021c344SAndy King 
297d021c344SAndy King 		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
298d021c344SAndy King 			pr_err("%p unable to send read notify to peer\n", sk);
299d021c344SAndy King 		else
300d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
301d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_write) = false;
302d021c344SAndy King #endif
303d021c344SAndy King 
304d021c344SAndy King 	}
305d021c344SAndy King 	return err;
306d021c344SAndy King }
307d021c344SAndy King 
308d021c344SAndy King static void
309d021c344SAndy King vmci_transport_handle_wrote(struct sock *sk,
310d021c344SAndy King 			    struct vmci_transport_packet *pkt,
311d021c344SAndy King 			    bool bottom_half,
312d021c344SAndy King 			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
313d021c344SAndy King {
314d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
315d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
316d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_read) = false;
317d021c344SAndy King #endif
318676d2369SDavid S. Miller 	sk->sk_data_ready(sk);
319d021c344SAndy King }
320d021c344SAndy King 
321d021c344SAndy King static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
322d021c344SAndy King {
323d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
324d021c344SAndy King 
325d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
326d021c344SAndy King 	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
327d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_read) = false;
328d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write) = false;
329d021c344SAndy King 	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
330d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_read) = false;
331d021c344SAndy King 	PKT_FIELD(vsk, sent_waiting_write) = false;
332d021c344SAndy King 	PKT_FIELD(vsk, produce_q_generation) = 0;
333d021c344SAndy King 	PKT_FIELD(vsk, consume_q_generation) = 0;
334d021c344SAndy King 
335d021c344SAndy King 	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
336d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
337d021c344SAndy King 	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
338d021c344SAndy King 	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
339d021c344SAndy King }
340d021c344SAndy King 
341d021c344SAndy King static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
342d021c344SAndy King {
343d021c344SAndy King }
344d021c344SAndy King 
345d021c344SAndy King static int
346d021c344SAndy King vmci_transport_notify_pkt_poll_in(struct sock *sk,
347d021c344SAndy King 				  size_t target, bool *data_ready_now)
348d021c344SAndy King {
349d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
350d021c344SAndy King 
351d021c344SAndy King 	if (vsock_stream_has_data(vsk)) {
352d021c344SAndy King 		*data_ready_now = true;
353d021c344SAndy King 	} else {
354d021c344SAndy King 		/* We can't read right now because there is nothing in the
355d021c344SAndy King 		 * queue. Ask for notifications when there is something to
356d021c344SAndy King 		 * read.
357d021c344SAndy King 		 */
358d021c344SAndy King 		if (sk->sk_state == SS_CONNECTED) {
359d021c344SAndy King 			if (!send_waiting_read(sk, 1))
360d021c344SAndy King 				return -1;
361d021c344SAndy King 
362d021c344SAndy King 		}
363d021c344SAndy King 		*data_ready_now = false;
364d021c344SAndy King 	}
365d021c344SAndy King 
366d021c344SAndy King 	return 0;
367d021c344SAndy King }
368d021c344SAndy King 
369d021c344SAndy King static int
370d021c344SAndy King vmci_transport_notify_pkt_poll_out(struct sock *sk,
371d021c344SAndy King 				   size_t target, bool *space_avail_now)
372d021c344SAndy King {
373d021c344SAndy King 	s64 produce_q_free_space;
374d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
375d021c344SAndy King 
376d021c344SAndy King 	produce_q_free_space = vsock_stream_has_space(vsk);
377d021c344SAndy King 	if (produce_q_free_space > 0) {
378d021c344SAndy King 		*space_avail_now = true;
379d021c344SAndy King 		return 0;
380d021c344SAndy King 	} else if (produce_q_free_space == 0) {
381d021c344SAndy King 		/* This is a connected socket but we can't currently send data.
382d021c344SAndy King 		 * Notify the peer that we are waiting if the queue is full. We
383d021c344SAndy King 		 * only send a waiting write if the queue is full because
384d021c344SAndy King 		 * otherwise we end up in an infinite WAITING_WRITE, READ,
385d021c344SAndy King 		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
386d021c344SAndy King 		 * notification as a socket error, passing that back through
387d021c344SAndy King 		 * the mask.
388d021c344SAndy King 		 */
389d021c344SAndy King 		if (!send_waiting_write(sk, 1))
390d021c344SAndy King 			return -1;
391d021c344SAndy King 
392d021c344SAndy King 		*space_avail_now = false;
393d021c344SAndy King 	}
394d021c344SAndy King 
395d021c344SAndy King 	return 0;
396d021c344SAndy King }
397d021c344SAndy King 
398d021c344SAndy King static int
399d021c344SAndy King vmci_transport_notify_pkt_recv_init(
400d021c344SAndy King 			struct sock *sk,
401d021c344SAndy King 			size_t target,
402d021c344SAndy King 			struct vmci_transport_recv_notify_data *data)
403d021c344SAndy King {
404d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
405d021c344SAndy King 
406d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
407d021c344SAndy King 	data->consume_head = 0;
408d021c344SAndy King 	data->produce_tail = 0;
409d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
410d021c344SAndy King 	data->notify_on_block = false;
411d021c344SAndy King 
412d021c344SAndy King 	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
413d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
414d021c344SAndy King 		if (PKT_FIELD(vsk, write_notify_window) <
415d021c344SAndy King 		    PKT_FIELD(vsk, write_notify_min_window)) {
416d021c344SAndy King 			/* If the current window is smaller than the new
417d021c344SAndy King 			 * minimal window size, we need to reevaluate whether
418d021c344SAndy King 			 * we need to notify the sender. If the number of ready
419d021c344SAndy King 			 * bytes are smaller than the new window, we need to
420d021c344SAndy King 			 * send a notification to the sender before we block.
421d021c344SAndy King 			 */
422d021c344SAndy King 
423d021c344SAndy King 			PKT_FIELD(vsk, write_notify_window) =
424d021c344SAndy King 			    PKT_FIELD(vsk, write_notify_min_window);
425d021c344SAndy King 			data->notify_on_block = true;
426d021c344SAndy King 		}
427d021c344SAndy King 	}
428d021c344SAndy King #endif
429d021c344SAndy King #endif
430d021c344SAndy King 
431d021c344SAndy King 	return 0;
432d021c344SAndy King }
433d021c344SAndy King 
434d021c344SAndy King static int
435d021c344SAndy King vmci_transport_notify_pkt_recv_pre_block(
436d021c344SAndy King 				struct sock *sk,
437d021c344SAndy King 				size_t target,
438d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
439d021c344SAndy King {
440d021c344SAndy King 	int err = 0;
441d021c344SAndy King 
442d021c344SAndy King 	/* Notify our peer that we are waiting for data to read. */
443d021c344SAndy King 	if (!send_waiting_read(sk, target)) {
444d021c344SAndy King 		err = -EHOSTUNREACH;
445d021c344SAndy King 		return err;
446d021c344SAndy King 	}
447d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
448d021c344SAndy King 	if (data->notify_on_block) {
449d021c344SAndy King 		err = vmci_transport_send_read_notification(sk);
450d021c344SAndy King 		if (err < 0)
451d021c344SAndy King 			return err;
452d021c344SAndy King 
453d021c344SAndy King 		data->notify_on_block = false;
454d021c344SAndy King 	}
455d021c344SAndy King #endif
456d021c344SAndy King 
457d021c344SAndy King 	return err;
458d021c344SAndy King }
459d021c344SAndy King 
460d021c344SAndy King static int
461d021c344SAndy King vmci_transport_notify_pkt_recv_pre_dequeue(
462d021c344SAndy King 				struct sock *sk,
463d021c344SAndy King 				size_t target,
464d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
465d021c344SAndy King {
466d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
467d021c344SAndy King 
468d021c344SAndy King 	/* Now consume up to len bytes from the queue.  Note that since we have
469d021c344SAndy King 	 * the socket locked we should copy at least ready bytes.
470d021c344SAndy King 	 */
471d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
472d021c344SAndy King 	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
473d021c344SAndy King 				       &data->produce_tail,
474d021c344SAndy King 				       &data->consume_head);
475d021c344SAndy King #endif
476d021c344SAndy King 
477d021c344SAndy King 	return 0;
478d021c344SAndy King }
479d021c344SAndy King 
480d021c344SAndy King static int
481d021c344SAndy King vmci_transport_notify_pkt_recv_post_dequeue(
482d021c344SAndy King 				struct sock *sk,
483d021c344SAndy King 				size_t target,
484d021c344SAndy King 				ssize_t copied,
485d021c344SAndy King 				bool data_read,
486d021c344SAndy King 				struct vmci_transport_recv_notify_data *data)
487d021c344SAndy King {
488d021c344SAndy King 	struct vsock_sock *vsk;
489d021c344SAndy King 	int err;
490d021c344SAndy King 
491d021c344SAndy King 	vsk = vsock_sk(sk);
492d021c344SAndy King 	err = 0;
493d021c344SAndy King 
494d021c344SAndy King 	if (data_read) {
495d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
496d021c344SAndy King 		/* Detect a wrap-around to maintain queue generation.  Note
497d021c344SAndy King 		 * that this is safe since we hold the socket lock across the
498d021c344SAndy King 		 * two queue pair operations.
499d021c344SAndy King 		 */
500d021c344SAndy King 		if (copied >=
501d021c344SAndy King 			vmci_trans(vsk)->consume_size - data->consume_head)
502d021c344SAndy King 			PKT_FIELD(vsk, consume_q_generation)++;
503d021c344SAndy King #endif
504d021c344SAndy King 
505d021c344SAndy King 		err = vmci_transport_send_read_notification(sk);
506d021c344SAndy King 		if (err < 0)
507d021c344SAndy King 			return err;
508d021c344SAndy King 
509d021c344SAndy King 	}
510d021c344SAndy King 	return err;
511d021c344SAndy King }
512d021c344SAndy King 
513d021c344SAndy King static int
514d021c344SAndy King vmci_transport_notify_pkt_send_init(
515d021c344SAndy King 			struct sock *sk,
516d021c344SAndy King 			struct vmci_transport_send_notify_data *data)
517d021c344SAndy King {
518d021c344SAndy King #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
519d021c344SAndy King 	data->consume_head = 0;
520d021c344SAndy King 	data->produce_tail = 0;
521d021c344SAndy King #endif
522d021c344SAndy King 
523d021c344SAndy King 	return 0;
524d021c344SAndy King }
525d021c344SAndy King 
526d021c344SAndy King static int
527d021c344SAndy King vmci_transport_notify_pkt_send_pre_block(
528d021c344SAndy King 				struct sock *sk,
529d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
530d021c344SAndy King {
531d021c344SAndy King 	/* Notify our peer that we are waiting for room to write. */
532d021c344SAndy King 	if (!send_waiting_write(sk, 1))
533d021c344SAndy King 		return -EHOSTUNREACH;
534d021c344SAndy King 
535d021c344SAndy King 	return 0;
536d021c344SAndy King }
537d021c344SAndy King 
538d021c344SAndy King static int
539d021c344SAndy King vmci_transport_notify_pkt_send_pre_enqueue(
540d021c344SAndy King 				struct sock *sk,
541d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
542d021c344SAndy King {
543d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
544d021c344SAndy King 
545d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
546d021c344SAndy King 	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
547d021c344SAndy King 				       &data->produce_tail,
548d021c344SAndy King 				       &data->consume_head);
549d021c344SAndy King #endif
550d021c344SAndy King 
551d021c344SAndy King 	return 0;
552d021c344SAndy King }
553d021c344SAndy King 
554d021c344SAndy King static int
555d021c344SAndy King vmci_transport_notify_pkt_send_post_enqueue(
556d021c344SAndy King 				struct sock *sk,
557d021c344SAndy King 				ssize_t written,
558d021c344SAndy King 				struct vmci_transport_send_notify_data *data)
559d021c344SAndy King {
560d021c344SAndy King 	int err = 0;
561d021c344SAndy King 	struct vsock_sock *vsk;
562d021c344SAndy King 	bool sent_wrote = false;
563d021c344SAndy King 	int retries = 0;
564d021c344SAndy King 
565d021c344SAndy King 	vsk = vsock_sk(sk);
566d021c344SAndy King 
567d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
568d021c344SAndy King 	/* Detect a wrap-around to maintain queue generation.  Note that this
569d021c344SAndy King 	 * is safe since we hold the socket lock across the two queue pair
570d021c344SAndy King 	 * operations.
571d021c344SAndy King 	 */
572d021c344SAndy King 	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
573d021c344SAndy King 		PKT_FIELD(vsk, produce_q_generation)++;
574d021c344SAndy King 
575d021c344SAndy King #endif
576d021c344SAndy King 
577d021c344SAndy King 	if (vmci_transport_notify_waiting_read(vsk)) {
578d021c344SAndy King 		/* Notify the peer that we have written, retrying the send on
579d021c344SAndy King 		 * failure up to our maximum value. See the XXX comment for the
580d021c344SAndy King 		 * corresponding piece of code in StreamRecvmsg() for potential
581d021c344SAndy King 		 * improvements.
582d021c344SAndy King 		 */
583d021c344SAndy King 		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
584d021c344SAndy King 		       !sent_wrote &&
585d021c344SAndy King 		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
586d021c344SAndy King 			err = vmci_transport_send_wrote(sk);
587d021c344SAndy King 			if (err >= 0)
588d021c344SAndy King 				sent_wrote = true;
589d021c344SAndy King 
590d021c344SAndy King 			retries++;
591d021c344SAndy King 		}
592d021c344SAndy King 
593d021c344SAndy King 		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
594d021c344SAndy King 			pr_err("%p unable to send wrote notify to peer\n", sk);
595d021c344SAndy King 			return err;
596d021c344SAndy King 		} else {
597d021c344SAndy King #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
598d021c344SAndy King 			PKT_FIELD(vsk, peer_waiting_read) = false;
599d021c344SAndy King #endif
600d021c344SAndy King 		}
601d021c344SAndy King 	}
602d021c344SAndy King 	return err;
603d021c344SAndy King }
604d021c344SAndy King 
605d021c344SAndy King static void
606d021c344SAndy King vmci_transport_notify_pkt_handle_pkt(
607d021c344SAndy King 			struct sock *sk,
608d021c344SAndy King 			struct vmci_transport_packet *pkt,
609d021c344SAndy King 			bool bottom_half,
610d021c344SAndy King 			struct sockaddr_vm *dst,
611d021c344SAndy King 			struct sockaddr_vm *src, bool *pkt_processed)
612d021c344SAndy King {
613d021c344SAndy King 	bool processed = false;
614d021c344SAndy King 
615d021c344SAndy King 	switch (pkt->type) {
616d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
617d021c344SAndy King 		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
618d021c344SAndy King 		processed = true;
619d021c344SAndy King 		break;
620d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_READ:
621d021c344SAndy King 		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
622d021c344SAndy King 		processed = true;
623d021c344SAndy King 		break;
624d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
625d021c344SAndy King 		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
626d021c344SAndy King 						    dst, src);
627d021c344SAndy King 		processed = true;
628d021c344SAndy King 		break;
629d021c344SAndy King 
630d021c344SAndy King 	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
631d021c344SAndy King 		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
632d021c344SAndy King 						   dst, src);
633d021c344SAndy King 		processed = true;
634d021c344SAndy King 		break;
635d021c344SAndy King 	}
636d021c344SAndy King 
637d021c344SAndy King 	if (pkt_processed)
638d021c344SAndy King 		*pkt_processed = processed;
639d021c344SAndy King }
640d021c344SAndy King 
641d021c344SAndy King static void vmci_transport_notify_pkt_process_request(struct sock *sk)
642d021c344SAndy King {
643d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
644d021c344SAndy King 
645d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
646d021c344SAndy King 	if (vmci_trans(vsk)->consume_size <
647d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window))
648d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) =
649d021c344SAndy King 			vmci_trans(vsk)->consume_size;
650d021c344SAndy King }
651d021c344SAndy King 
652d021c344SAndy King static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
653d021c344SAndy King {
654d021c344SAndy King 	struct vsock_sock *vsk = vsock_sk(sk);
655d021c344SAndy King 
656d021c344SAndy King 	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
657d021c344SAndy King 	if (vmci_trans(vsk)->consume_size <
658d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window))
659d021c344SAndy King 		PKT_FIELD(vsk, write_notify_min_window) =
660d021c344SAndy King 			vmci_trans(vsk)->consume_size;
661d021c344SAndy King }
662d021c344SAndy King 
663d021c344SAndy King /* Socket control packet based operations. */
6643b22dae3SJulia Lawall const struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
665*99a5e178SKees Cook 	.socket_init = vmci_transport_notify_pkt_socket_init,
666*99a5e178SKees Cook 	.socket_destruct = vmci_transport_notify_pkt_socket_destruct,
667*99a5e178SKees Cook 	.poll_in = vmci_transport_notify_pkt_poll_in,
668*99a5e178SKees Cook 	.poll_out = vmci_transport_notify_pkt_poll_out,
669*99a5e178SKees Cook 	.handle_notify_pkt = vmci_transport_notify_pkt_handle_pkt,
670*99a5e178SKees Cook 	.recv_init = vmci_transport_notify_pkt_recv_init,
671*99a5e178SKees Cook 	.recv_pre_block = vmci_transport_notify_pkt_recv_pre_block,
672*99a5e178SKees Cook 	.recv_pre_dequeue = vmci_transport_notify_pkt_recv_pre_dequeue,
673*99a5e178SKees Cook 	.recv_post_dequeue = vmci_transport_notify_pkt_recv_post_dequeue,
674*99a5e178SKees Cook 	.send_init = vmci_transport_notify_pkt_send_init,
675*99a5e178SKees Cook 	.send_pre_block = vmci_transport_notify_pkt_send_pre_block,
676*99a5e178SKees Cook 	.send_pre_enqueue = vmci_transport_notify_pkt_send_pre_enqueue,
677*99a5e178SKees Cook 	.send_post_enqueue = vmci_transport_notify_pkt_send_post_enqueue,
678*99a5e178SKees Cook 	.process_request = vmci_transport_notify_pkt_process_request,
679*99a5e178SKees Cook 	.process_negotiate = vmci_transport_notify_pkt_process_negotiate,
680d021c344SAndy King };
681