xref: /linux/include/linux/vmw_vmci_defs.h (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * VMware VMCI Driver
4  *
5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
6  */
7 
8 #ifndef _VMW_VMCI_DEF_H_
9 #define _VMW_VMCI_DEF_H_
10 
11 #include <linux/atomic.h>
12 #include <linux/bits.h>
13 
14 /* Register offsets. */
15 #define VMCI_STATUS_ADDR        0x00
16 #define VMCI_CONTROL_ADDR       0x04
17 #define VMCI_ICR_ADDR           0x08
18 #define VMCI_IMR_ADDR           0x0c
19 #define VMCI_DATA_OUT_ADDR      0x10
20 #define VMCI_DATA_IN_ADDR       0x14
21 #define VMCI_CAPS_ADDR          0x18
22 #define VMCI_RESULT_LOW_ADDR    0x1c
23 #define VMCI_RESULT_HIGH_ADDR   0x20
24 #define VMCI_DATA_OUT_LOW_ADDR  0x24
25 #define VMCI_DATA_OUT_HIGH_ADDR 0x28
26 #define VMCI_DATA_IN_LOW_ADDR   0x2c
27 #define VMCI_DATA_IN_HIGH_ADDR  0x30
28 #define VMCI_GUEST_PAGE_SHIFT   0x34
29 
30 /* Max number of devices. */
31 #define VMCI_MAX_DEVICES 1
32 
33 /* Status register bits. */
34 #define VMCI_STATUS_INT_ON     BIT(0)
35 
36 /* Control register bits. */
37 #define VMCI_CONTROL_RESET        BIT(0)
38 #define VMCI_CONTROL_INT_ENABLE   BIT(1)
39 #define VMCI_CONTROL_INT_DISABLE  BIT(2)
40 
41 /* Capabilities register bits. */
42 #define VMCI_CAPS_HYPERCALL     BIT(0)
43 #define VMCI_CAPS_GUESTCALL     BIT(1)
44 #define VMCI_CAPS_DATAGRAM      BIT(2)
45 #define VMCI_CAPS_NOTIFICATIONS BIT(3)
46 #define VMCI_CAPS_PPN64         BIT(4)
47 #define VMCI_CAPS_DMA_DATAGRAM  BIT(5)
48 
49 /* Interrupt Cause register bits. */
50 #define VMCI_ICR_DATAGRAM      BIT(0)
51 #define VMCI_ICR_NOTIFICATION  BIT(1)
52 #define VMCI_ICR_DMA_DATAGRAM  BIT(2)
53 
54 /* Interrupt Mask register bits. */
55 #define VMCI_IMR_DATAGRAM      BIT(0)
56 #define VMCI_IMR_NOTIFICATION  BIT(1)
57 #define VMCI_IMR_DMA_DATAGRAM  BIT(2)
58 
59 /*
60  * Maximum MSI/MSI-X interrupt vectors in the device.
61  * If VMCI_CAPS_DMA_DATAGRAM is supported by the device,
62  * VMCI_MAX_INTRS_DMA_DATAGRAM vectors are available,
63  * otherwise only VMCI_MAX_INTRS_NOTIFICATION.
64  */
65 #define VMCI_MAX_INTRS_NOTIFICATION 2
66 #define VMCI_MAX_INTRS_DMA_DATAGRAM 3
67 #define VMCI_MAX_INTRS              VMCI_MAX_INTRS_DMA_DATAGRAM
68 
69 /*
70  * Supported interrupt vectors.  There is one for each ICR value above,
71  * but here they indicate the position in the vector array/message ID.
72  */
73 enum {
74 	VMCI_INTR_DATAGRAM = 0,
75 	VMCI_INTR_NOTIFICATION = 1,
76 	VMCI_INTR_DMA_DATAGRAM = 2,
77 };
78 
79 /*
80  * A single VMCI device has an upper limit of 128MB on the amount of
81  * memory that can be used for queue pairs. Since each queue pair
82  * consists of at least two pages, the memory limit also dictates the
83  * number of queue pairs a guest can create.
84  */
85 #define VMCI_MAX_GUEST_QP_MEMORY ((size_t)(128 * 1024 * 1024))
86 #define VMCI_MAX_GUEST_QP_COUNT  (VMCI_MAX_GUEST_QP_MEMORY / PAGE_SIZE / 2)
87 
88 /*
89  * There can be at most PAGE_SIZE doorbells since there is one doorbell
90  * per byte in the doorbell bitmap page.
91  */
92 #define VMCI_MAX_GUEST_DOORBELL_COUNT PAGE_SIZE
93 
94 /*
95  * Queues with pre-mapped data pages must be small, so that we don't pin
96  * too much kernel memory (especially on vmkernel).  We limit a queuepair to
97  * 32 KB, or 16 KB per queue for symmetrical pairs.
98  */
99 #define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024))
100 
101 /*
102  * The version of the VMCI device that supports MMIO access to registers
103  * requests 256KB for BAR1 whereas the version of VMCI that supports
104  * MSI/MSI-X only requests 8KB. The layout of the larger 256KB region is:
105  * - the first 128KB are used for MSI/MSI-X.
106  * - the following 64KB are used for MMIO register access.
107  * - the remaining 64KB are unused.
108  */
109 #define VMCI_WITH_MMIO_ACCESS_BAR_SIZE ((size_t)(256 * 1024))
110 #define VMCI_MMIO_ACCESS_OFFSET        ((size_t)(128 * 1024))
111 #define VMCI_MMIO_ACCESS_SIZE          ((size_t)(64 * 1024))
112 
113 /*
114  * For VMCI devices supporting the VMCI_CAPS_DMA_DATAGRAM capability, the
115  * sending and receiving of datagrams can be performed using DMA to/from
116  * a driver allocated buffer.
117  * Sending and receiving will be handled as follows:
118  * - when sending datagrams, the driver initializes the buffer where the
119  *   data part will refer to the outgoing VMCI datagram, sets the busy flag
120  *   to 1 and writes the address of the buffer to VMCI_DATA_OUT_HIGH_ADDR
121  *   and VMCI_DATA_OUT_LOW_ADDR. Writing to VMCI_DATA_OUT_LOW_ADDR triggers
122  *   the device processing of the buffer. When the device has processed the
123  *   buffer, it will write the result value to the buffer and then clear the
124  *   busy flag.
125  * - when receiving datagrams, the driver initializes the buffer where the
126  *   data part will describe the receive buffer, clears the busy flag and
127  *   writes the address of the buffer to VMCI_DATA_IN_HIGH_ADDR and
128  *   VMCI_DATA_IN_LOW_ADDR. Writing to VMCI_DATA_IN_LOW_ADDR triggers the
129  *   device processing of the buffer. The device will copy as many available
130  *   datagrams into the buffer as possible, and then sets the busy flag.
131  *   When the busy flag is set, the driver will process the datagrams in the
132  *   buffer.
133  */
134 struct vmci_data_in_out_header {
135 	uint32_t busy;
136 	uint32_t opcode;
137 	uint32_t size;
138 	uint32_t rsvd;
139 	uint64_t result;
140 };
141 
142 struct vmci_sg_elem {
143 	uint64_t addr;
144 	uint64_t size;
145 };
146 
147 /*
148  * We have a fixed set of resource IDs available in the VMX.
149  * This allows us to have a very simple implementation since we statically
150  * know how many will create datagram handles. If a new caller arrives and
151  * we have run out of slots we can manually increment the maximum size of
152  * available resource IDs.
153  *
154  * VMCI reserved hypervisor datagram resource IDs.
155  */
156 enum {
157 	VMCI_RESOURCES_QUERY = 0,
158 	VMCI_GET_CONTEXT_ID = 1,
159 	VMCI_SET_NOTIFY_BITMAP = 2,
160 	VMCI_DOORBELL_LINK = 3,
161 	VMCI_DOORBELL_UNLINK = 4,
162 	VMCI_DOORBELL_NOTIFY = 5,
163 	/*
164 	 * VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP are
165 	 * obsoleted by the removal of VM to VM communication.
166 	 */
167 	VMCI_DATAGRAM_REQUEST_MAP = 6,
168 	VMCI_DATAGRAM_REMOVE_MAP = 7,
169 	VMCI_EVENT_SUBSCRIBE = 8,
170 	VMCI_EVENT_UNSUBSCRIBE = 9,
171 	VMCI_QUEUEPAIR_ALLOC = 10,
172 	VMCI_QUEUEPAIR_DETACH = 11,
173 
174 	/*
175 	 * VMCI_VSOCK_VMX_LOOKUP was assigned to 12 for Fusion 3.0/3.1,
176 	 * WS 7.0/7.1 and ESX 4.1
177 	 */
178 	VMCI_HGFS_TRANSPORT = 13,
179 	VMCI_UNITY_PBRPC_REGISTER = 14,
180 	VMCI_RPC_PRIVILEGED = 15,
181 	VMCI_RPC_UNPRIVILEGED = 16,
182 	VMCI_RESOURCE_MAX = 17,
183 };
184 
185 /*
186  * struct vmci_handle - Ownership information structure
187  * @context:    The VMX context ID.
188  * @resource:   The resource ID (used for locating in resource hash).
189  *
190  * The vmci_handle structure is used to track resources used within
191  * vmw_vmci.
192  */
193 struct vmci_handle {
194 	u32 context;
195 	u32 resource;
196 };
197 
198 #define vmci_make_handle(_cid, _rid) \
199 	(struct vmci_handle){ .context = _cid, .resource = _rid }
200 
vmci_handle_is_equal(struct vmci_handle h1,struct vmci_handle h2)201 static inline bool vmci_handle_is_equal(struct vmci_handle h1,
202 					struct vmci_handle h2)
203 {
204 	return h1.context == h2.context && h1.resource == h2.resource;
205 }
206 
207 #define VMCI_INVALID_ID ~0
208 static const struct vmci_handle VMCI_INVALID_HANDLE = {
209 	.context = VMCI_INVALID_ID,
210 	.resource = VMCI_INVALID_ID
211 };
212 
vmci_handle_is_invalid(struct vmci_handle h)213 static inline bool vmci_handle_is_invalid(struct vmci_handle h)
214 {
215 	return vmci_handle_is_equal(h, VMCI_INVALID_HANDLE);
216 }
217 
218 /*
219  * The below defines can be used to send anonymous requests.
220  * This also indicates that no response is expected.
221  */
222 #define VMCI_ANON_SRC_CONTEXT_ID   VMCI_INVALID_ID
223 #define VMCI_ANON_SRC_RESOURCE_ID  VMCI_INVALID_ID
224 static const struct vmci_handle __maybe_unused VMCI_ANON_SRC_HANDLE = {
225 	.context = VMCI_ANON_SRC_CONTEXT_ID,
226 	.resource = VMCI_ANON_SRC_RESOURCE_ID
227 };
228 
229 /* The lowest 16 context ids are reserved for internal use. */
230 #define VMCI_RESERVED_CID_LIMIT ((u32) 16)
231 
232 /*
233  * Hypervisor context id, used for calling into hypervisor
234  * supplied services from the VM.
235  */
236 #define VMCI_HYPERVISOR_CONTEXT_ID 0
237 
238 /*
239  * Well-known context id, a logical context that contains a set of
240  * well-known services. This context ID is now obsolete.
241  */
242 #define VMCI_WELL_KNOWN_CONTEXT_ID 1
243 
244 /*
245  * Context ID used by host endpoints.
246  */
247 #define VMCI_HOST_CONTEXT_ID  2
248 
249 #define VMCI_CONTEXT_IS_VM(_cid) (VMCI_INVALID_ID != (_cid) &&		\
250 				  (_cid) > VMCI_HOST_CONTEXT_ID)
251 
252 /*
253  * The VMCI_CONTEXT_RESOURCE_ID is used together with vmci_make_handle to make
254  * handles that refer to a specific context.
255  */
256 #define VMCI_CONTEXT_RESOURCE_ID 0
257 
258 /*
259  * VMCI error codes.
260  */
261 enum {
262 	VMCI_SUCCESS_QUEUEPAIR_ATTACH	= 5,
263 	VMCI_SUCCESS_QUEUEPAIR_CREATE	= 4,
264 	VMCI_SUCCESS_LAST_DETACH	= 3,
265 	VMCI_SUCCESS_ACCESS_GRANTED	= 2,
266 	VMCI_SUCCESS_ENTRY_DEAD		= 1,
267 	VMCI_SUCCESS			 = 0,
268 	VMCI_ERROR_INVALID_RESOURCE	 = (-1),
269 	VMCI_ERROR_INVALID_ARGS		 = (-2),
270 	VMCI_ERROR_NO_MEM		 = (-3),
271 	VMCI_ERROR_DATAGRAM_FAILED	 = (-4),
272 	VMCI_ERROR_MORE_DATA		 = (-5),
273 	VMCI_ERROR_NO_MORE_DATAGRAMS	 = (-6),
274 	VMCI_ERROR_NO_ACCESS		 = (-7),
275 	VMCI_ERROR_NO_HANDLE		 = (-8),
276 	VMCI_ERROR_DUPLICATE_ENTRY	 = (-9),
277 	VMCI_ERROR_DST_UNREACHABLE	 = (-10),
278 	VMCI_ERROR_PAYLOAD_TOO_LARGE	 = (-11),
279 	VMCI_ERROR_INVALID_PRIV		 = (-12),
280 	VMCI_ERROR_GENERIC		 = (-13),
281 	VMCI_ERROR_PAGE_ALREADY_SHARED	 = (-14),
282 	VMCI_ERROR_CANNOT_SHARE_PAGE	 = (-15),
283 	VMCI_ERROR_CANNOT_UNSHARE_PAGE	 = (-16),
284 	VMCI_ERROR_NO_PROCESS		 = (-17),
285 	VMCI_ERROR_NO_DATAGRAM		 = (-18),
286 	VMCI_ERROR_NO_RESOURCES		 = (-19),
287 	VMCI_ERROR_UNAVAILABLE		 = (-20),
288 	VMCI_ERROR_NOT_FOUND		 = (-21),
289 	VMCI_ERROR_ALREADY_EXISTS	 = (-22),
290 	VMCI_ERROR_NOT_PAGE_ALIGNED	 = (-23),
291 	VMCI_ERROR_INVALID_SIZE		 = (-24),
292 	VMCI_ERROR_REGION_ALREADY_SHARED = (-25),
293 	VMCI_ERROR_TIMEOUT		 = (-26),
294 	VMCI_ERROR_DATAGRAM_INCOMPLETE	 = (-27),
295 	VMCI_ERROR_INCORRECT_IRQL	 = (-28),
296 	VMCI_ERROR_EVENT_UNKNOWN	 = (-29),
297 	VMCI_ERROR_OBSOLETE		 = (-30),
298 	VMCI_ERROR_QUEUEPAIR_MISMATCH	 = (-31),
299 	VMCI_ERROR_QUEUEPAIR_NOTSET	 = (-32),
300 	VMCI_ERROR_QUEUEPAIR_NOTOWNER	 = (-33),
301 	VMCI_ERROR_QUEUEPAIR_NOTATTACHED = (-34),
302 	VMCI_ERROR_QUEUEPAIR_NOSPACE	 = (-35),
303 	VMCI_ERROR_QUEUEPAIR_NODATA	 = (-36),
304 	VMCI_ERROR_BUSMEM_INVALIDATION	 = (-37),
305 	VMCI_ERROR_MODULE_NOT_LOADED	 = (-38),
306 	VMCI_ERROR_DEVICE_NOT_FOUND	 = (-39),
307 	VMCI_ERROR_QUEUEPAIR_NOT_READY	 = (-40),
308 	VMCI_ERROR_WOULD_BLOCK		 = (-41),
309 
310 	/* VMCI clients should return error code within this range */
311 	VMCI_ERROR_CLIENT_MIN		 = (-500),
312 	VMCI_ERROR_CLIENT_MAX		 = (-550),
313 
314 	/* Internal error codes. */
315 	VMCI_SHAREDMEM_ERROR_BAD_CONTEXT = (-1000),
316 };
317 
318 /* VMCI reserved events. */
319 enum {
320 	/* Only applicable to guest endpoints */
321 	VMCI_EVENT_CTX_ID_UPDATE  = 0,
322 
323 	/* Applicable to guest and host */
324 	VMCI_EVENT_CTX_REMOVED	  = 1,
325 
326 	/* Only applicable to guest endpoints */
327 	VMCI_EVENT_QP_RESUMED	  = 2,
328 
329 	/* Applicable to guest and host */
330 	VMCI_EVENT_QP_PEER_ATTACH = 3,
331 
332 	/* Applicable to guest and host */
333 	VMCI_EVENT_QP_PEER_DETACH = 4,
334 
335 	/*
336 	 * Applicable to VMX and vmk.  On vmk,
337 	 * this event has the Context payload type.
338 	 */
339 	VMCI_EVENT_MEM_ACCESS_ON  = 5,
340 
341 	/*
342 	 * Applicable to VMX and vmk.  Same as
343 	 * above for the payload type.
344 	 */
345 	VMCI_EVENT_MEM_ACCESS_OFF = 6,
346 	VMCI_EVENT_MAX		  = 7,
347 };
348 
349 /*
350  * Of the above events, a few are reserved for use in the VMX, and
351  * other endpoints (guest and host kernel) should not use them. For
352  * the rest of the events, we allow both host and guest endpoints to
353  * subscribe to them, to maintain the same API for host and guest
354  * endpoints.
355  */
356 #define VMCI_EVENT_VALID_VMX(_event) ((_event) == VMCI_EVENT_MEM_ACCESS_ON || \
357 				      (_event) == VMCI_EVENT_MEM_ACCESS_OFF)
358 
359 #define VMCI_EVENT_VALID(_event) ((_event) < VMCI_EVENT_MAX &&		\
360 				  !VMCI_EVENT_VALID_VMX(_event))
361 
362 /* Reserved guest datagram resource ids. */
363 #define VMCI_EVENT_HANDLER 0
364 
365 /*
366  * VMCI coarse-grained privileges (per context or host
367  * process/endpoint. An entity with the restricted flag is only
368  * allowed to interact with the hypervisor and trusted entities.
369  */
370 enum {
371 	VMCI_NO_PRIVILEGE_FLAGS = 0,
372 	VMCI_PRIVILEGE_FLAG_RESTRICTED = 1,
373 	VMCI_PRIVILEGE_FLAG_TRUSTED = 2,
374 	VMCI_PRIVILEGE_ALL_FLAGS = (VMCI_PRIVILEGE_FLAG_RESTRICTED |
375 				    VMCI_PRIVILEGE_FLAG_TRUSTED),
376 	VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS = VMCI_NO_PRIVILEGE_FLAGS,
377 	VMCI_LEAST_PRIVILEGE_FLAGS = VMCI_PRIVILEGE_FLAG_RESTRICTED,
378 	VMCI_MAX_PRIVILEGE_FLAGS = VMCI_PRIVILEGE_FLAG_TRUSTED,
379 };
380 
381 /* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
382 #define VMCI_RESERVED_RESOURCE_ID_MAX 1023
383 
384 /*
385  * Driver version.
386  *
387  * Increment major version when you make an incompatible change.
388  * Compatibility goes both ways (old driver with new executable
389  * as well as new driver with old executable).
390  */
391 
392 /* Never change VMCI_VERSION_SHIFT_WIDTH */
393 #define VMCI_VERSION_SHIFT_WIDTH 16
394 #define VMCI_MAKE_VERSION(_major, _minor)			\
395 	((_major) << VMCI_VERSION_SHIFT_WIDTH | (u16) (_minor))
396 
397 #define VMCI_VERSION_MAJOR(v)  ((u32) (v) >> VMCI_VERSION_SHIFT_WIDTH)
398 #define VMCI_VERSION_MINOR(v)  ((u16) (v))
399 
400 /*
401  * VMCI_VERSION is always the current version.  Subsequently listed
402  * versions are ways of detecting previous versions of the connecting
403  * application (i.e., VMX).
404  *
405  * VMCI_VERSION_NOVMVM: This version removed support for VM to VM
406  * communication.
407  *
408  * VMCI_VERSION_NOTIFY: This version introduced doorbell notification
409  * support.
410  *
411  * VMCI_VERSION_HOSTQP: This version introduced host end point support
412  * for hosted products.
413  *
414  * VMCI_VERSION_PREHOSTQP: This is the version prior to the adoption of
415  * support for host end-points.
416  *
417  * VMCI_VERSION_PREVERS2: This fictional version number is intended to
418  * represent the version of a VMX which doesn't call into the driver
419  * with ioctl VERSION2 and thus doesn't establish its version with the
420  * driver.
421  */
422 
423 #define VMCI_VERSION                VMCI_VERSION_NOVMVM
424 #define VMCI_VERSION_NOVMVM         VMCI_MAKE_VERSION(11, 0)
425 #define VMCI_VERSION_NOTIFY         VMCI_MAKE_VERSION(10, 0)
426 #define VMCI_VERSION_HOSTQP         VMCI_MAKE_VERSION(9, 0)
427 #define VMCI_VERSION_PREHOSTQP      VMCI_MAKE_VERSION(8, 0)
428 #define VMCI_VERSION_PREVERS2       VMCI_MAKE_VERSION(1, 0)
429 
430 #define VMCI_SOCKETS_MAKE_VERSION(_p)					\
431 	((((_p)[0] & 0xFF) << 24) | (((_p)[1] & 0xFF) << 16) | ((_p)[2]))
432 
433 /*
434  * The VMCI IOCTLs.  We use identity code 7, as noted in ioctl-number.h, and
435  * we start at sequence 9f.  This gives us the same values that our shipping
436  * products use, starting at 1951, provided we leave out the direction and
437  * structure size.  Note that VMMon occupies the block following us, starting
438  * at 2001.
439  */
440 #define IOCTL_VMCI_VERSION			_IO(7, 0x9f)	/* 1951 */
441 #define IOCTL_VMCI_INIT_CONTEXT			_IO(7, 0xa0)
442 #define IOCTL_VMCI_QUEUEPAIR_SETVA		_IO(7, 0xa4)
443 #define IOCTL_VMCI_NOTIFY_RESOURCE		_IO(7, 0xa5)
444 #define IOCTL_VMCI_NOTIFICATIONS_RECEIVE	_IO(7, 0xa6)
445 #define IOCTL_VMCI_VERSION2			_IO(7, 0xa7)
446 #define IOCTL_VMCI_QUEUEPAIR_ALLOC		_IO(7, 0xa8)
447 #define IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE	_IO(7, 0xa9)
448 #define IOCTL_VMCI_QUEUEPAIR_DETACH		_IO(7, 0xaa)
449 #define IOCTL_VMCI_DATAGRAM_SEND		_IO(7, 0xab)
450 #define IOCTL_VMCI_DATAGRAM_RECEIVE		_IO(7, 0xac)
451 #define IOCTL_VMCI_CTX_ADD_NOTIFICATION		_IO(7, 0xaf)
452 #define IOCTL_VMCI_CTX_REMOVE_NOTIFICATION	_IO(7, 0xb0)
453 #define IOCTL_VMCI_CTX_GET_CPT_STATE		_IO(7, 0xb1)
454 #define IOCTL_VMCI_CTX_SET_CPT_STATE		_IO(7, 0xb2)
455 #define IOCTL_VMCI_GET_CONTEXT_ID		_IO(7, 0xb3)
456 #define IOCTL_VMCI_SOCKETS_VERSION		_IO(7, 0xb4)
457 #define IOCTL_VMCI_SOCKETS_GET_AF_VALUE		_IO(7, 0xb8)
458 #define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID	_IO(7, 0xb9)
459 #define IOCTL_VMCI_SET_NOTIFY			_IO(7, 0xcb)	/* 1995 */
460 /*IOCTL_VMMON_START				_IO(7, 0xd1)*/	/* 2001 */
461 
462 /*
463  * struct vmci_queue_header - VMCI Queue Header information.
464  *
465  * A Queue cannot stand by itself as designed.  Each Queue's header
466  * contains a pointer into itself (the producer_tail) and into its peer
467  * (consumer_head).  The reason for the separation is one of
468  * accessibility: Each end-point can modify two things: where the next
469  * location to enqueue is within its produce_q (producer_tail); and
470  * where the next dequeue location is in its consume_q (consumer_head).
471  *
472  * An end-point cannot modify the pointers of its peer (guest to
473  * guest; NOTE that in the host both queue headers are mapped r/w).
474  * But, each end-point needs read access to both Queue header
475  * structures in order to determine how much space is used (or left)
476  * in the Queue.  This is because for an end-point to know how full
477  * its produce_q is, it needs to use the consumer_head that points into
478  * the produce_q but -that- consumer_head is in the Queue header for
479  * that end-points consume_q.
480  *
481  * Thoroughly confused?  Sorry.
482  *
483  * producer_tail: the point to enqueue new entrants.  When you approach
484  * a line in a store, for example, you walk up to the tail.
485  *
486  * consumer_head: the point in the queue from which the next element is
487  * dequeued.  In other words, who is next in line is he who is at the
488  * head of the line.
489  *
490  * Also, producer_tail points to an empty byte in the Queue, whereas
491  * consumer_head points to a valid byte of data (unless producer_tail ==
492  * consumer_head in which case consumer_head does not point to a valid
493  * byte of data).
494  *
495  * For a queue of buffer 'size' bytes, the tail and head pointers will be in
496  * the range [0, size-1].
497  *
498  * If produce_q_header->producer_tail == consume_q_header->consumer_head
499  * then the produce_q is empty.
500  */
501 struct vmci_queue_header {
502 	/* All fields are 64bit and aligned. */
503 	struct vmci_handle handle;	/* Identifier. */
504 	u64 producer_tail;	/* Offset in this queue. */
505 	u64 consumer_head;	/* Offset in peer queue. */
506 };
507 
508 /*
509  * struct vmci_datagram - Base struct for vmci datagrams.
510  * @dst:        A vmci_handle that tracks the destination of the datagram.
511  * @src:        A vmci_handle that tracks the source of the datagram.
512  * @payload_size:       The size of the payload.
513  *
514  * vmci_datagram structs are used when sending vmci datagrams.  They include
515  * the necessary source and destination information to properly route
516  * the information along with the size of the package.
517  */
518 struct vmci_datagram {
519 	struct vmci_handle dst;
520 	struct vmci_handle src;
521 	u64 payload_size;
522 };
523 
524 /*
525  * Second flag is for creating a well-known handle instead of a per context
526  * handle.  Next flag is for deferring datagram delivery, so that the
527  * datagram callback is invoked in a delayed context (not interrupt context).
528  */
529 #define VMCI_FLAG_DG_NONE          0
530 #define VMCI_FLAG_WELLKNOWN_DG_HND BIT(0)
531 #define VMCI_FLAG_ANYCID_DG_HND    BIT(1)
532 #define VMCI_FLAG_DG_DELAYED_CB    BIT(2)
533 
534 /*
535  * Maximum supported size of a VMCI datagram for routable datagrams.
536  * Datagrams going to the hypervisor are allowed to be larger.
537  */
538 #define VMCI_MAX_DG_SIZE (17 * 4096)
539 #define VMCI_MAX_DG_PAYLOAD_SIZE (VMCI_MAX_DG_SIZE - \
540 				  sizeof(struct vmci_datagram))
541 #define VMCI_DG_PAYLOAD(_dg) (void *)((char *)(_dg) +			\
542 				      sizeof(struct vmci_datagram))
543 #define VMCI_DG_HEADERSIZE sizeof(struct vmci_datagram)
544 #define VMCI_DG_SIZE(_dg) (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size)
545 #define VMCI_DG_SIZE_ALIGNED(_dg) ((VMCI_DG_SIZE(_dg) + 7) & (~((size_t) 0x7)))
546 #define VMCI_MAX_DATAGRAM_QUEUE_SIZE (VMCI_MAX_DG_SIZE * 2)
547 
548 struct vmci_event_payload_qp {
549 	struct vmci_handle handle;  /* queue_pair handle. */
550 	u32 peer_id;		    /* Context id of attaching/detaching VM. */
551 	u32 _pad;
552 };
553 
554 /* Flags for VMCI queue_pair API. */
555 enum {
556 	/* Fail alloc if QP not created by peer. */
557 	VMCI_QPFLAG_ATTACH_ONLY = 1 << 0,
558 
559 	/* Only allow attaches from local context. */
560 	VMCI_QPFLAG_LOCAL = 1 << 1,
561 
562 	/* Host won't block when guest is quiesced. */
563 	VMCI_QPFLAG_NONBLOCK = 1 << 2,
564 
565 	/* Pin data pages in ESX.  Used with NONBLOCK */
566 	VMCI_QPFLAG_PINNED = 1 << 3,
567 
568 	/* Update the following flag when adding new flags. */
569 	VMCI_QP_ALL_FLAGS = (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QPFLAG_LOCAL |
570 			     VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED),
571 
572 	/* Convenience flags */
573 	VMCI_QP_ASYMM = (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED),
574 	VMCI_QP_ASYMM_PEER = (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QP_ASYMM),
575 };
576 
577 /*
578  * We allow at least 1024 more event datagrams from the hypervisor past the
579  * normally allowed datagrams pending for a given context.  We define this
580  * limit on event datagrams from the hypervisor to guard against DoS attack
581  * from a malicious VM which could repeatedly attach to and detach from a queue
582  * pair, causing events to be queued at the destination VM.  However, the rate
583  * at which such events can be generated is small since it requires a VM exit
584  * and handling of queue pair attach/detach call at the hypervisor.  Event
585  * datagrams may be queued up at the destination VM if it has interrupts
586  * disabled or if it is not draining events for some other reason.  1024
587  * datagrams is a grossly conservative estimate of the time for which
588  * interrupts may be disabled in the destination VM, but at the same time does
589  * not exacerbate the memory pressure problem on the host by much (size of each
590  * event datagram is small).
591  */
592 #define VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE				\
593 	(VMCI_MAX_DATAGRAM_QUEUE_SIZE +					\
594 	 1024 * (sizeof(struct vmci_datagram) +				\
595 		 sizeof(struct vmci_event_data_max)))
596 
597 /*
598  * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of
599  * hypervisor resources.  Struct size is 16 bytes. All fields in struct are
600  * aligned to their natural alignment.
601  */
602 struct vmci_resource_query_hdr {
603 	struct vmci_datagram hdr;
604 	u32 num_resources;
605 	u32 _padding;
606 };
607 
608 /*
609  * Convenience struct for negotiating vectors. Must match layout of
610  * VMCIResourceQueryHdr minus the struct vmci_datagram header.
611  */
612 struct vmci_resource_query_msg {
613 	u32 num_resources;
614 	u32 _padding;
615 	u32 resources[1];
616 };
617 
618 /*
619  * The maximum number of resources that can be queried using
620  * VMCI_RESOURCE_QUERY is 31, as the result is encoded in the lower 31
621  * bits of a positive return value. Negative values are reserved for
622  * errors.
623  */
624 #define VMCI_RESOURCE_QUERY_MAX_NUM 31
625 
626 /* Maximum size for the VMCI_RESOURCE_QUERY request. */
627 #define VMCI_RESOURCE_QUERY_MAX_SIZE				\
628 	(sizeof(struct vmci_resource_query_hdr) +		\
629 	 sizeof(u32) * VMCI_RESOURCE_QUERY_MAX_NUM)
630 
631 /*
632  * Struct used for setting the notification bitmap.  All fields in
633  * struct are aligned to their natural alignment.
634  */
635 struct vmci_notify_bm_set_msg {
636 	struct vmci_datagram hdr;
637 	union {
638 		u32 bitmap_ppn32;
639 		u64 bitmap_ppn64;
640 	};
641 };
642 
643 /*
644  * Struct used for linking a doorbell handle with an index in the
645  * notify bitmap. All fields in struct are aligned to their natural
646  * alignment.
647  */
648 struct vmci_doorbell_link_msg {
649 	struct vmci_datagram hdr;
650 	struct vmci_handle handle;
651 	u64 notify_idx;
652 };
653 
654 /*
655  * Struct used for unlinking a doorbell handle from an index in the
656  * notify bitmap. All fields in struct are aligned to their natural
657  * alignment.
658  */
659 struct vmci_doorbell_unlink_msg {
660 	struct vmci_datagram hdr;
661 	struct vmci_handle handle;
662 };
663 
664 /*
665  * Struct used for generating a notification on a doorbell handle. All
666  * fields in struct are aligned to their natural alignment.
667  */
668 struct vmci_doorbell_notify_msg {
669 	struct vmci_datagram hdr;
670 	struct vmci_handle handle;
671 };
672 
673 /*
674  * This struct is used to contain data for events.  Size of this struct is a
675  * multiple of 8 bytes, and all fields are aligned to their natural alignment.
676  */
677 struct vmci_event_data {
678 	u32 event;		/* 4 bytes. */
679 	u32 _pad;
680 	/* Event payload is put here. */
681 };
682 
683 /*
684  * Define the different VMCI_EVENT payload data types here.  All structs must
685  * be a multiple of 8 bytes, and fields must be aligned to their natural
686  * alignment.
687  */
688 struct vmci_event_payld_ctx {
689 	u32 context_id;	/* 4 bytes. */
690 	u32 _pad;
691 };
692 
693 struct vmci_event_payld_qp {
694 	struct vmci_handle handle;  /* queue_pair handle. */
695 	u32 peer_id;	    /* Context id of attaching/detaching VM. */
696 	u32 _pad;
697 };
698 
699 /*
700  * We define the following struct to get the size of the maximum event
701  * data the hypervisor may send to the guest.  If adding a new event
702  * payload type above, add it to the following struct too (inside the
703  * union).
704  */
705 struct vmci_event_data_max {
706 	struct vmci_event_data event_data;
707 	union {
708 		struct vmci_event_payld_ctx context_payload;
709 		struct vmci_event_payld_qp qp_payload;
710 	} ev_data_payload;
711 };
712 
713 /*
714  * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and
715  * VMCI_EVENT_HANDLER messages.  Struct size is 32 bytes.  All fields
716  * in struct are aligned to their natural alignment.
717  */
718 struct vmci_event_msg {
719 	struct vmci_datagram hdr;
720 
721 	/* Has event type and payload. */
722 	struct vmci_event_data event_data;
723 
724 	/* Payload gets put here. */
725 };
726 
727 /* Event with context payload. */
728 struct vmci_event_ctx {
729 	struct vmci_event_msg msg;
730 	struct vmci_event_payld_ctx payload;
731 };
732 
733 /* Event with QP payload. */
734 struct vmci_event_qp {
735 	struct vmci_event_msg msg;
736 	struct vmci_event_payld_qp payload;
737 };
738 
739 /*
740  * Structs used for queue_pair alloc and detach messages.  We align fields of
741  * these structs to 64bit boundaries.
742  */
743 struct vmci_qp_alloc_msg {
744 	struct vmci_datagram hdr;
745 	struct vmci_handle handle;
746 	u32 peer;
747 	u32 flags;
748 	u64 produce_size;
749 	u64 consume_size;
750 	u64 num_ppns;
751 
752 	/* List of PPNs placed here. */
753 };
754 
755 struct vmci_qp_detach_msg {
756 	struct vmci_datagram hdr;
757 	struct vmci_handle handle;
758 };
759 
760 /* VMCI Doorbell API. */
761 #define VMCI_FLAG_DELAYED_CB BIT(0)
762 
763 typedef void (*vmci_callback) (void *client_data);
764 
765 /*
766  * struct vmci_qp - A vmw_vmci queue pair handle.
767  *
768  * This structure is used as a handle to a queue pair created by
769  * VMCI.  It is intentionally left opaque to clients.
770  */
771 struct vmci_qp;
772 
773 /* Callback needed for correctly waiting on events. */
774 typedef int (*vmci_datagram_recv_cb) (void *client_data,
775 				      struct vmci_datagram *msg);
776 
777 /* VMCI Event API. */
778 typedef void (*vmci_event_cb) (u32 sub_id, const struct vmci_event_data *ed,
779 			       void *client_data);
780 
781 /*
782  * We use the following inline function to access the payload data
783  * associated with an event data.
784  */
785 static inline const void *
vmci_event_data_const_payload(const struct vmci_event_data * ev_data)786 vmci_event_data_const_payload(const struct vmci_event_data *ev_data)
787 {
788 	return (const char *)ev_data + sizeof(*ev_data);
789 }
790 
vmci_event_data_payload(struct vmci_event_data * ev_data)791 static inline void *vmci_event_data_payload(struct vmci_event_data *ev_data)
792 {
793 	return (void *)vmci_event_data_const_payload(ev_data);
794 }
795 
796 /*
797  * Helper to read a value from a head or tail pointer. For X86_32, the
798  * pointer is treated as a 32bit value, since the pointer value
799  * never exceeds a 32bit value in this case. Also, doing an
800  * atomic64_read on X86_32 uniprocessor systems may be implemented
801  * as a non locked cmpxchg8b, that may end up overwriting updates done
802  * by the VMCI device to the memory location. On 32bit SMP, the lock
803  * prefix will be used, so correctness isn't an issue, but using a
804  * 64bit operation still adds unnecessary overhead.
805  */
vmci_q_read_pointer(u64 * var)806 static inline u64 vmci_q_read_pointer(u64 *var)
807 {
808 	return READ_ONCE(*(unsigned long *)var);
809 }
810 
811 /*
812  * Helper to set the value of a head or tail pointer. For X86_32, the
813  * pointer is treated as a 32bit value, since the pointer value
814  * never exceeds a 32bit value in this case. On 32bit SMP, using a
815  * locked cmpxchg8b adds unnecessary overhead.
816  */
vmci_q_set_pointer(u64 * var,u64 new_val)817 static inline void vmci_q_set_pointer(u64 *var, u64 new_val)
818 {
819 	/* XXX buggered on big-endian */
820 	WRITE_ONCE(*(unsigned long *)var, (unsigned long)new_val);
821 }
822 
823 /*
824  * Helper to add a given offset to a head or tail pointer. Wraps the
825  * value of the pointer around the max size of the queue.
826  */
vmci_qp_add_pointer(u64 * var,size_t add,u64 size)827 static inline void vmci_qp_add_pointer(u64 *var, size_t add, u64 size)
828 {
829 	u64 new_val = vmci_q_read_pointer(var);
830 
831 	if (new_val >= size - add)
832 		new_val -= size;
833 
834 	new_val += add;
835 
836 	vmci_q_set_pointer(var, new_val);
837 }
838 
839 /*
840  * Helper routine to get the Producer Tail from the supplied queue.
841  */
842 static inline u64
vmci_q_header_producer_tail(const struct vmci_queue_header * q_header)843 vmci_q_header_producer_tail(const struct vmci_queue_header *q_header)
844 {
845 	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
846 	return vmci_q_read_pointer(&qh->producer_tail);
847 }
848 
849 /*
850  * Helper routine to get the Consumer Head from the supplied queue.
851  */
852 static inline u64
vmci_q_header_consumer_head(const struct vmci_queue_header * q_header)853 vmci_q_header_consumer_head(const struct vmci_queue_header *q_header)
854 {
855 	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
856 	return vmci_q_read_pointer(&qh->consumer_head);
857 }
858 
859 /*
860  * Helper routine to increment the Producer Tail.  Fundamentally,
861  * vmci_qp_add_pointer() is used to manipulate the tail itself.
862  */
863 static inline void
vmci_q_header_add_producer_tail(struct vmci_queue_header * q_header,size_t add,u64 queue_size)864 vmci_q_header_add_producer_tail(struct vmci_queue_header *q_header,
865 				size_t add,
866 				u64 queue_size)
867 {
868 	vmci_qp_add_pointer(&q_header->producer_tail, add, queue_size);
869 }
870 
871 /*
872  * Helper routine to increment the Consumer Head.  Fundamentally,
873  * vmci_qp_add_pointer() is used to manipulate the head itself.
874  */
875 static inline void
vmci_q_header_add_consumer_head(struct vmci_queue_header * q_header,size_t add,u64 queue_size)876 vmci_q_header_add_consumer_head(struct vmci_queue_header *q_header,
877 				size_t add,
878 				u64 queue_size)
879 {
880 	vmci_qp_add_pointer(&q_header->consumer_head, add, queue_size);
881 }
882 
883 /*
884  * Helper routine for getting the head and the tail pointer for a queue.
885  * Both the VMCIQueues are needed to get both the pointers for one queue.
886  */
887 static inline void
vmci_q_header_get_pointers(const struct vmci_queue_header * produce_q_header,const struct vmci_queue_header * consume_q_header,u64 * producer_tail,u64 * consumer_head)888 vmci_q_header_get_pointers(const struct vmci_queue_header *produce_q_header,
889 			   const struct vmci_queue_header *consume_q_header,
890 			   u64 *producer_tail,
891 			   u64 *consumer_head)
892 {
893 	if (producer_tail)
894 		*producer_tail = vmci_q_header_producer_tail(produce_q_header);
895 
896 	if (consumer_head)
897 		*consumer_head = vmci_q_header_consumer_head(consume_q_header);
898 }
899 
vmci_q_header_init(struct vmci_queue_header * q_header,const struct vmci_handle handle)900 static inline void vmci_q_header_init(struct vmci_queue_header *q_header,
901 				      const struct vmci_handle handle)
902 {
903 	q_header->handle = handle;
904 	q_header->producer_tail = 0;
905 	q_header->consumer_head = 0;
906 }
907 
908 /*
909  * Finds available free space in a produce queue to enqueue more
910  * data or reports an error if queue pair corruption is detected.
911  */
912 static s64
vmci_q_header_free_space(const struct vmci_queue_header * produce_q_header,const struct vmci_queue_header * consume_q_header,const u64 produce_q_size)913 vmci_q_header_free_space(const struct vmci_queue_header *produce_q_header,
914 			 const struct vmci_queue_header *consume_q_header,
915 			 const u64 produce_q_size)
916 {
917 	u64 tail;
918 	u64 head;
919 	u64 free_space;
920 
921 	tail = vmci_q_header_producer_tail(produce_q_header);
922 	head = vmci_q_header_consumer_head(consume_q_header);
923 
924 	if (tail >= produce_q_size || head >= produce_q_size)
925 		return VMCI_ERROR_INVALID_SIZE;
926 
927 	/*
928 	 * Deduct 1 to avoid tail becoming equal to head which causes
929 	 * ambiguity. If head and tail are equal it means that the
930 	 * queue is empty.
931 	 */
932 	if (tail >= head)
933 		free_space = produce_q_size - (tail - head) - 1;
934 	else
935 		free_space = head - tail - 1;
936 
937 	return free_space;
938 }
939 
940 /*
941  * vmci_q_header_free_space() does all the heavy lifting of
942  * determing the number of free bytes in a Queue.  This routine,
943  * then subtracts that size from the full size of the Queue so
944  * the caller knows how many bytes are ready to be dequeued.
945  * Results:
946  * On success, available data size in bytes (up to MAX_INT64).
947  * On failure, appropriate error code.
948  */
949 static inline s64
vmci_q_header_buf_ready(const struct vmci_queue_header * consume_q_header,const struct vmci_queue_header * produce_q_header,const u64 consume_q_size)950 vmci_q_header_buf_ready(const struct vmci_queue_header *consume_q_header,
951 			const struct vmci_queue_header *produce_q_header,
952 			const u64 consume_q_size)
953 {
954 	s64 free_space;
955 
956 	free_space = vmci_q_header_free_space(consume_q_header,
957 					      produce_q_header, consume_q_size);
958 	if (free_space < VMCI_SUCCESS)
959 		return free_space;
960 
961 	return consume_q_size - free_space - 1;
962 }
963 
964 
965 #endif /* _VMW_VMCI_DEF_H_ */
966