1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * VMware vSockets Driver
4 *
5 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
6 */
7
8 #ifndef __AF_VSOCK_H__
9 #define __AF_VSOCK_H__
10
11 #include <linux/kernel.h>
12 #include <linux/workqueue.h>
13 #include <net/netns/vsock.h>
14 #include <net/sock.h>
15 #include <uapi/linux/vm_sockets.h>
16
17 #include "vsock_addr.h"
18
19 #define LAST_RESERVED_PORT 1023
20
21 #define VSOCK_HASH_SIZE 251
22 extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
23 extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
24 extern spinlock_t vsock_table_lock;
25
26 #define vsock_sk(__sk) ((struct vsock_sock *)__sk)
27 #define sk_vsock(__vsk) (&(__vsk)->sk)
28
29 struct vsock_sock {
30 /* sk must be the first member. */
31 struct sock sk;
32 const struct vsock_transport *transport;
33 struct sockaddr_vm local_addr;
34 struct sockaddr_vm remote_addr;
35 /* Links for the global tables of bound and connected sockets. */
36 struct list_head bound_table;
37 struct list_head connected_table;
38 /* Accessed without the socket lock held. This means it can never be
39 * modified outsided of socket create or destruct.
40 */
41 bool trusted;
42 bool cached_peer_allow_dgram; /* Dgram communication allowed to
43 * cached peer?
44 */
45 u32 cached_peer; /* Context ID of last dgram destination check. */
46 const struct cred *owner;
47 /* Rest are SOCK_STREAM only. */
48 long connect_timeout;
49 /* Listening socket that this came from. */
50 struct sock *listener;
51 /* Used for pending list and accept queue during connection handshake.
52 * The listening socket is the head for both lists. Sockets created
53 * for connection requests are placed in the pending list until they
54 * are connected, at which point they are put in the accept queue list
55 * so they can be accepted in accept(). If accept() cannot accept the
56 * connection, it is marked as rejected so the cleanup function knows
57 * to clean up the socket.
58 */
59 struct list_head pending_links;
60 struct list_head accept_queue;
61 bool rejected;
62 struct delayed_work connect_work;
63 struct delayed_work pending_work;
64 struct delayed_work close_work;
65 bool close_work_scheduled;
66 u32 peer_shutdown;
67 bool sent_request;
68 bool ignore_connecting_rst;
69
70 /* Protected by lock_sock(sk) */
71 u64 buffer_size;
72 u64 buffer_min_size;
73 u64 buffer_max_size;
74
75 /* Private to transport. */
76 void *trans;
77 };
78
79 s64 vsock_connectible_has_data(struct vsock_sock *vsk);
80 s64 vsock_stream_has_data(struct vsock_sock *vsk);
81 s64 vsock_stream_has_space(struct vsock_sock *vsk);
82 struct sock *vsock_create_connected(struct sock *parent);
83 void vsock_data_ready(struct sock *sk);
84
85 /**** TRANSPORT ****/
86
87 struct vsock_transport_recv_notify_data {
88 u64 data1; /* Transport-defined. */
89 u64 data2; /* Transport-defined. */
90 bool notify_on_block;
91 };
92
93 struct vsock_transport_send_notify_data {
94 u64 data1; /* Transport-defined. */
95 u64 data2; /* Transport-defined. */
96 };
97
98 /* Transport features flags */
99 /* Transport provides host->guest communication */
100 #define VSOCK_TRANSPORT_F_H2G 0x00000001
101 /* Transport provides guest->host communication */
102 #define VSOCK_TRANSPORT_F_G2H 0x00000002
103 /* Transport provides DGRAM communication */
104 #define VSOCK_TRANSPORT_F_DGRAM 0x00000004
105 /* Transport provides local (loopback) communication */
106 #define VSOCK_TRANSPORT_F_LOCAL 0x00000008
107
108 struct vsock_transport {
109 struct module *module;
110
111 /* Initialize/tear-down socket. */
112 int (*init)(struct vsock_sock *, struct vsock_sock *);
113 void (*destruct)(struct vsock_sock *);
114 void (*release)(struct vsock_sock *);
115
116 /* Cancel all pending packets sent on vsock. */
117 int (*cancel_pkt)(struct vsock_sock *vsk);
118
119 /* Connections. */
120 int (*connect)(struct vsock_sock *);
121
122 /* DGRAM. */
123 int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
124 int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
125 size_t len, int flags);
126 int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
127 struct msghdr *, size_t len);
128 bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
129
130 /* STREAM. */
131 /* TODO: stream_bind() */
132 ssize_t (*stream_dequeue)(struct vsock_sock *, struct msghdr *,
133 size_t len, int flags);
134 ssize_t (*stream_enqueue)(struct vsock_sock *, struct msghdr *,
135 size_t len);
136 s64 (*stream_has_data)(struct vsock_sock *);
137 s64 (*stream_has_space)(struct vsock_sock *);
138 u64 (*stream_rcvhiwat)(struct vsock_sock *);
139 bool (*stream_is_active)(struct vsock_sock *);
140 bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port);
141
142 /* SEQ_PACKET. */
143 ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
144 int flags);
145 int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
146 size_t len);
147 bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid);
148 u32 (*seqpacket_has_data)(struct vsock_sock *vsk);
149
150 /* Notification. */
151 int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
152 int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
153 int (*notify_recv_init)(struct vsock_sock *, size_t,
154 struct vsock_transport_recv_notify_data *);
155 int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
156 struct vsock_transport_recv_notify_data *);
157 int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
158 struct vsock_transport_recv_notify_data *);
159 int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
160 ssize_t, bool, struct vsock_transport_recv_notify_data *);
161 int (*notify_send_init)(struct vsock_sock *,
162 struct vsock_transport_send_notify_data *);
163 int (*notify_send_pre_block)(struct vsock_sock *,
164 struct vsock_transport_send_notify_data *);
165 int (*notify_send_pre_enqueue)(struct vsock_sock *,
166 struct vsock_transport_send_notify_data *);
167 int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
168 struct vsock_transport_send_notify_data *);
169 /* sk_lock held by the caller */
170 void (*notify_buffer_size)(struct vsock_sock *, u64 *);
171 int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
172
173 /* SIOCOUTQ ioctl */
174 ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
175
176 /* Shutdown. */
177 int (*shutdown)(struct vsock_sock *, int);
178
179 /* Addressing. */
180 u32 (*get_local_cid)(void);
181
182 /* Read a single skb */
183 int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
184
185 /* Zero-copy. */
186 bool (*msgzerocopy_allow)(void);
187 };
188
189 /**** CORE ****/
190
191 int vsock_core_register(const struct vsock_transport *t, int features);
192 void vsock_core_unregister(const struct vsock_transport *t);
193
194 /* The transport may downcast this to access transport-specific functions */
195 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk);
196
197 /**** UTILS ****/
198
199 /* vsock_table_lock must be held */
__vsock_in_bound_table(struct vsock_sock * vsk)200 static inline bool __vsock_in_bound_table(struct vsock_sock *vsk)
201 {
202 return !list_empty(&vsk->bound_table);
203 }
204
205 /* vsock_table_lock must be held */
__vsock_in_connected_table(struct vsock_sock * vsk)206 static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
207 {
208 return !list_empty(&vsk->connected_table);
209 }
210
211 void vsock_add_pending(struct sock *listener, struct sock *pending);
212 void vsock_remove_pending(struct sock *listener, struct sock *pending);
213 void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
214 void vsock_insert_connected(struct vsock_sock *vsk);
215 void vsock_remove_bound(struct vsock_sock *vsk);
216 void vsock_remove_connected(struct vsock_sock *vsk);
217 struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
218 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
219 struct sockaddr_vm *dst);
220 struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr,
221 struct net *net);
222 struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src,
223 struct sockaddr_vm *dst,
224 struct net *net);
225 void vsock_remove_sock(struct vsock_sock *vsk);
226 void vsock_for_each_connected_socket(struct vsock_transport *transport,
227 void (*fn)(struct sock *sk));
228 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
229 bool vsock_find_cid(unsigned int cid);
230 void vsock_linger(struct sock *sk);
231
232 /**** TAP ****/
233
234 struct vsock_tap {
235 struct net_device *dev;
236 struct module *module;
237 struct list_head list;
238 };
239
240 int vsock_add_tap(struct vsock_tap *vt);
241 int vsock_remove_tap(struct vsock_tap *vt);
242 void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque);
243 int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
244 int flags);
245 int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
246 int flags);
247 int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
248 size_t len, int flags);
249 int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
250 size_t len, int flags);
251
252 extern struct proto vsock_proto;
253 #ifdef CONFIG_BPF_SYSCALL
254 int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
255 void __init vsock_bpf_build_proto(void);
256 #else
vsock_bpf_build_proto(void)257 static inline void __init vsock_bpf_build_proto(void)
258 {}
259 #endif
260
vsock_msgzerocopy_allow(const struct vsock_transport * t)261 static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
262 {
263 return t->msgzerocopy_allow && t->msgzerocopy_allow();
264 }
265
vsock_net_mode(struct net * net)266 static inline enum vsock_net_mode vsock_net_mode(struct net *net)
267 {
268 if (!net)
269 return VSOCK_NET_MODE_GLOBAL;
270
271 return READ_ONCE(net->vsock.mode);
272 }
273
vsock_net_mode_global(struct vsock_sock * vsk)274 static inline bool vsock_net_mode_global(struct vsock_sock *vsk)
275 {
276 return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL;
277 }
278
vsock_net_set_child_mode(struct net * net,enum vsock_net_mode mode)279 static inline bool vsock_net_set_child_mode(struct net *net,
280 enum vsock_net_mode mode)
281 {
282 int new_locked = mode + 1;
283 int old_locked = 0; /* unlocked */
284
285 if (try_cmpxchg(&net->vsock.child_ns_mode_locked,
286 &old_locked, new_locked)) {
287 WRITE_ONCE(net->vsock.child_ns_mode, mode);
288 return true;
289 }
290
291 return old_locked == new_locked;
292 }
293
vsock_net_child_mode(struct net * net)294 static inline enum vsock_net_mode vsock_net_child_mode(struct net *net)
295 {
296 return READ_ONCE(net->vsock.child_ns_mode);
297 }
298
299 /* Return true if two namespaces pass the mode rules. Otherwise, return false.
300 *
301 * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL.
302 *
303 * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c.
304 */
vsock_net_check_mode(struct net * ns0,struct net * ns1)305 static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1)
306 {
307 enum vsock_net_mode mode0, mode1;
308
309 /* Any vsocks within the same network namespace are always reachable,
310 * regardless of the mode.
311 */
312 if (net_eq(ns0, ns1))
313 return true;
314
315 mode0 = vsock_net_mode(ns0);
316 mode1 = vsock_net_mode(ns1);
317
318 /* Different namespaces are only reachable if they are both
319 * global mode.
320 */
321 return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1;
322 }
323 #endif /* __AF_VSOCK_H__ */
324