1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 /* 3 * VMware vSockets Driver 4 * 5 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved. 6 */ 7 8 #ifndef __AF_VSOCK_H__ 9 #define __AF_VSOCK_H__ 10 11 #include <linux/kernel.h> 12 #include <linux/workqueue.h> 13 #include <net/netns/vsock.h> 14 #include <net/sock.h> 15 #include <uapi/linux/vm_sockets.h> 16 17 #include "vsock_addr.h" 18 19 #define LAST_RESERVED_PORT 1023 20 21 #define VSOCK_HASH_SIZE 251 22 extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; 23 extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; 24 extern spinlock_t vsock_table_lock; 25 26 #define vsock_sk(__sk) ((struct vsock_sock *)__sk) 27 #define sk_vsock(__vsk) (&(__vsk)->sk) 28 29 struct vsock_sock { 30 /* sk must be the first member. */ 31 struct sock sk; 32 const struct vsock_transport *transport; 33 struct sockaddr_vm local_addr; 34 struct sockaddr_vm remote_addr; 35 /* Links for the global tables of bound and connected sockets. */ 36 struct list_head bound_table; 37 struct list_head connected_table; 38 /* Accessed without the socket lock held. This means it can never be 39 * modified outsided of socket create or destruct. 40 */ 41 bool trusted; 42 bool cached_peer_allow_dgram; /* Dgram communication allowed to 43 * cached peer? 44 */ 45 u32 cached_peer; /* Context ID of last dgram destination check. */ 46 const struct cred *owner; 47 /* Rest are SOCK_STREAM only. */ 48 long connect_timeout; 49 /* Listening socket that this came from. */ 50 struct sock *listener; 51 /* Used for pending list and accept queue during connection handshake. 52 * The listening socket is the head for both lists. Sockets created 53 * for connection requests are placed in the pending list until they 54 * are connected, at which point they are put in the accept queue list 55 * so they can be accepted in accept(). If accept() cannot accept the 56 * connection, it is marked as rejected so the cleanup function knows 57 * to clean up the socket. 58 */ 59 struct list_head pending_links; 60 struct list_head accept_queue; 61 bool rejected; 62 struct delayed_work connect_work; 63 struct delayed_work pending_work; 64 struct delayed_work close_work; 65 bool close_work_scheduled; 66 u32 peer_shutdown; 67 bool sent_request; 68 bool ignore_connecting_rst; 69 70 /* Protected by lock_sock(sk) */ 71 u64 buffer_size; 72 u64 buffer_min_size; 73 u64 buffer_max_size; 74 75 /* Private to transport. */ 76 void *trans; 77 }; 78 79 s64 vsock_connectible_has_data(struct vsock_sock *vsk); 80 s64 vsock_stream_has_data(struct vsock_sock *vsk); 81 s64 vsock_stream_has_space(struct vsock_sock *vsk); 82 struct sock *vsock_create_connected(struct sock *parent); 83 void vsock_data_ready(struct sock *sk); 84 85 /**** TRANSPORT ****/ 86 87 struct vsock_transport_recv_notify_data { 88 u64 data1; /* Transport-defined. */ 89 u64 data2; /* Transport-defined. */ 90 bool notify_on_block; 91 }; 92 93 struct vsock_transport_send_notify_data { 94 u64 data1; /* Transport-defined. */ 95 u64 data2; /* Transport-defined. */ 96 }; 97 98 /* Transport features flags */ 99 /* Transport provides host->guest communication */ 100 #define VSOCK_TRANSPORT_F_H2G 0x00000001 101 /* Transport provides guest->host communication */ 102 #define VSOCK_TRANSPORT_F_G2H 0x00000002 103 /* Transport provides DGRAM communication */ 104 #define VSOCK_TRANSPORT_F_DGRAM 0x00000004 105 /* Transport provides local (loopback) communication */ 106 #define VSOCK_TRANSPORT_F_LOCAL 0x00000008 107 108 struct vsock_transport { 109 struct module *module; 110 111 /* Initialize/tear-down socket. */ 112 int (*init)(struct vsock_sock *, struct vsock_sock *); 113 void (*destruct)(struct vsock_sock *); 114 void (*release)(struct vsock_sock *); 115 116 /* Cancel all pending packets sent on vsock. */ 117 int (*cancel_pkt)(struct vsock_sock *vsk); 118 119 /* Connections. */ 120 int (*connect)(struct vsock_sock *); 121 122 /* DGRAM. */ 123 int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *); 124 int (*dgram_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, 125 size_t len, int flags); 126 int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *, 127 struct msghdr *, size_t len); 128 bool (*dgram_allow)(struct vsock_sock *vsk, u32 cid, u32 port); 129 130 /* STREAM. */ 131 /* TODO: stream_bind() */ 132 ssize_t (*stream_dequeue)(struct vsock_sock *, struct msghdr *, 133 size_t len, int flags); 134 ssize_t (*stream_enqueue)(struct vsock_sock *, struct msghdr *, 135 size_t len); 136 s64 (*stream_has_data)(struct vsock_sock *); 137 s64 (*stream_has_space)(struct vsock_sock *); 138 u64 (*stream_rcvhiwat)(struct vsock_sock *); 139 bool (*stream_is_active)(struct vsock_sock *); 140 bool (*stream_allow)(struct vsock_sock *vsk, u32 cid, u32 port); 141 142 /* SEQ_PACKET. */ 143 ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, 144 int flags); 145 int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg, 146 size_t len); 147 bool (*seqpacket_allow)(struct vsock_sock *vsk, u32 remote_cid); 148 u32 (*seqpacket_has_data)(struct vsock_sock *vsk); 149 150 /* Notification. */ 151 int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); 152 int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); 153 int (*notify_recv_init)(struct vsock_sock *, size_t, 154 struct vsock_transport_recv_notify_data *); 155 int (*notify_recv_pre_block)(struct vsock_sock *, size_t, 156 struct vsock_transport_recv_notify_data *); 157 int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t, 158 struct vsock_transport_recv_notify_data *); 159 int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t, 160 ssize_t, bool, struct vsock_transport_recv_notify_data *); 161 int (*notify_send_init)(struct vsock_sock *, 162 struct vsock_transport_send_notify_data *); 163 int (*notify_send_pre_block)(struct vsock_sock *, 164 struct vsock_transport_send_notify_data *); 165 int (*notify_send_pre_enqueue)(struct vsock_sock *, 166 struct vsock_transport_send_notify_data *); 167 int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t, 168 struct vsock_transport_send_notify_data *); 169 /* sk_lock held by the caller */ 170 void (*notify_buffer_size)(struct vsock_sock *, u64 *); 171 int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); 172 173 /* SIOCOUTQ ioctl */ 174 ssize_t (*unsent_bytes)(struct vsock_sock *vsk); 175 176 /* Shutdown. */ 177 int (*shutdown)(struct vsock_sock *, int); 178 179 /* Addressing. */ 180 u32 (*get_local_cid)(void); 181 182 /* Check if this transport serves a specific remote CID. 183 * For H2G transports: return true if the CID belongs to a registered 184 * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G. 185 * For G2H transports: return true if the transport can reach arbitrary 186 * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI 187 * does not implement this as it only serves CIDs 0 and 2. 188 */ 189 bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid); 190 191 /* Read a single skb */ 192 int (*read_skb)(struct vsock_sock *, skb_read_actor_t); 193 194 /* Zero-copy. */ 195 bool (*msgzerocopy_allow)(void); 196 }; 197 198 /**** CORE ****/ 199 200 int vsock_core_register(const struct vsock_transport *t, int features); 201 void vsock_core_unregister(const struct vsock_transport *t); 202 203 /* The transport may downcast this to access transport-specific functions */ 204 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk); 205 206 /**** UTILS ****/ 207 208 /* vsock_table_lock must be held */ 209 static inline bool __vsock_in_bound_table(struct vsock_sock *vsk) 210 { 211 return !list_empty(&vsk->bound_table); 212 } 213 214 /* vsock_table_lock must be held */ 215 static inline bool __vsock_in_connected_table(struct vsock_sock *vsk) 216 { 217 return !list_empty(&vsk->connected_table); 218 } 219 220 void vsock_add_pending(struct sock *listener, struct sock *pending); 221 void vsock_remove_pending(struct sock *listener, struct sock *pending); 222 void vsock_enqueue_accept(struct sock *listener, struct sock *connected); 223 void vsock_insert_connected(struct vsock_sock *vsk); 224 void vsock_remove_bound(struct vsock_sock *vsk); 225 void vsock_remove_connected(struct vsock_sock *vsk); 226 struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); 227 struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, 228 struct sockaddr_vm *dst); 229 struct sock *vsock_find_bound_socket_net(struct sockaddr_vm *addr, 230 struct net *net); 231 struct sock *vsock_find_connected_socket_net(struct sockaddr_vm *src, 232 struct sockaddr_vm *dst, 233 struct net *net); 234 void vsock_remove_sock(struct vsock_sock *vsk); 235 void vsock_for_each_connected_socket(struct vsock_transport *transport, 236 void (*fn)(struct sock *sk)); 237 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); 238 bool vsock_find_cid(unsigned int cid); 239 void vsock_linger(struct sock *sk); 240 241 /**** TAP ****/ 242 243 struct vsock_tap { 244 struct net_device *dev; 245 struct module *module; 246 struct list_head list; 247 }; 248 249 int vsock_add_tap(struct vsock_tap *vt); 250 int vsock_remove_tap(struct vsock_tap *vt); 251 void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); 252 int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 253 int flags); 254 int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 255 int flags); 256 int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 257 size_t len, int flags); 258 int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 259 size_t len, int flags); 260 261 extern struct proto vsock_proto; 262 #ifdef CONFIG_BPF_SYSCALL 263 int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); 264 void __init vsock_bpf_build_proto(void); 265 #else 266 static inline void __init vsock_bpf_build_proto(void) 267 {} 268 #endif 269 270 static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t) 271 { 272 return t->msgzerocopy_allow && t->msgzerocopy_allow(); 273 } 274 275 static inline enum vsock_net_mode vsock_net_mode(struct net *net) 276 { 277 if (!net) 278 return VSOCK_NET_MODE_GLOBAL; 279 280 return READ_ONCE(net->vsock.mode); 281 } 282 283 static inline bool vsock_net_mode_global(struct vsock_sock *vsk) 284 { 285 return vsock_net_mode(sock_net(sk_vsock(vsk))) == VSOCK_NET_MODE_GLOBAL; 286 } 287 288 static inline bool vsock_net_set_child_mode(struct net *net, 289 enum vsock_net_mode mode) 290 { 291 int new_locked = mode + 1; 292 int old_locked = 0; /* unlocked */ 293 294 if (try_cmpxchg(&net->vsock.child_ns_mode_locked, 295 &old_locked, new_locked)) { 296 WRITE_ONCE(net->vsock.child_ns_mode, mode); 297 return true; 298 } 299 300 return old_locked == new_locked; 301 } 302 303 static inline enum vsock_net_mode vsock_net_child_mode(struct net *net) 304 { 305 return READ_ONCE(net->vsock.child_ns_mode); 306 } 307 308 /* Return true if two namespaces pass the mode rules. Otherwise, return false. 309 * 310 * A NULL namespace is treated as VSOCK_NET_MODE_GLOBAL. 311 * 312 * Read more about modes in the comment header of net/vmw_vsock/af_vsock.c. 313 */ 314 static inline bool vsock_net_check_mode(struct net *ns0, struct net *ns1) 315 { 316 enum vsock_net_mode mode0, mode1; 317 318 /* Any vsocks within the same network namespace are always reachable, 319 * regardless of the mode. 320 */ 321 if (net_eq(ns0, ns1)) 322 return true; 323 324 mode0 = vsock_net_mode(ns0); 325 mode1 = vsock_net_mode(ns1); 326 327 /* Different namespaces are only reachable if they are both 328 * global mode. 329 */ 330 return mode0 == VSOCK_NET_MODE_GLOBAL && mode0 == mode1; 331 } 332 #endif /* __AF_VSOCK_H__ */ 333