xref: /illumos-gate/usr/src/uts/common/io/virtio/virtio.h (revision 85f4cb87104c72587029a6e0f1663332c85ba118)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  */
15 
16 #ifndef _VIRTIO_H
17 #define	_VIRTIO_H
18 
19 /*
20  * VIRTIO FRAMEWORK
21  *
22  * This framework handles the initialisation and operation common to all Virtio
23  * device types; e.g., Virtio Block (vioblk), Virtio Network (vioif), etc.  The
24  * framework presently provides for what is now described as a "legacy" driver
25  * in the current issue of the "Virtual I/O Device (VIRTIO) Version 1.1"
26  * specification.  Though several new specifications have been released, legacy
27  * devices are still the most widely available on current hypervisor platforms.
28  * Legacy devices make use of the native byte order of the host system.
29  *
30  * FRAMEWORK INITIALISATION: STARTING
31  *
32  * Client drivers will, in their attach(9E) routine, make an early call to
33  * virtio_init().  This causes the framework to allocate some base resources
34  * and begin initialising the device.  This routine confirms that the device
35  * will operate in the supported legacy mode as per the specification.  A
36  * failure here means that we cannot presently support this device.
37  *
38  * Once virtio_init() returns, the initialisation phase has begun and the
39  * driver can examine negotiated features and set up virtqueues.  The
40  * initialisation phase ends when the driver calls either
41  * virtio_init_complete() or virtio_fini().
42  *
43  * FRAMEWORK INITIALISATION: FEATURE NEGOTIATION
44  *
45  * The virtio_init() call accepts a bitmask of desired features that the driver
46  * supports.  The framework will negotiate the common set of features supported
47  * by both the driver and the device.  The presence of any individual feature
48  * can be tested after the initialisation phase has begun using
49  * virtio_feature_present().
50  *
51  * The framework will additionally negotiate some set of features that are not
52  * specific to a device type on behalf of the client driver; e.g., support for
53  * indirect descriptors.
54  *
55  * Some features allow the driver to read additional configuration values from
56  * the device-specific regions of the device register space.  These can be
57  * accessed via the virtio_dev_get*() and virtio_dev_put*() family of
58  * functions.
59  *
60  * FRAMEWORK INITIALISATION: VIRTQUEUE CONFIGURATION
61  *
62  * During the initialisation phase, the client driver may configure some number
63  * of virtqueues with virtio_queue_alloc().  Once initialisation has been
64  * completed, no further queues can be configured without destroying the
65  * framework object and beginning again from scratch.
66  *
67  * When configuring a queue, the driver must know the queue index number.  This
68  * generally comes from the section of the specification describing the
69  * specific device type; e.g., Virtio Network devices have a receive queue at
70  * index 0, and a transmit queue at index 1.  The name given to the queue is
71  * informational and has no impact on device operation.
72  *
73  * Most queues will require an interrupt handler function.  When a queue
74  * notification interrupt is received, the provided handler will be called with
75  * two arguments: first, the provided user data argument; and second, a pointer
76  * to the "virtio_t" object for this instance.
77  *
78  * A maximum segment count must be selected for each queue.  This count is the
79  * upper bound on the number of scatter-gather cookies that will be accepted,
80  * and applies to both direct and indirect descriptor based queues.  This cap
81  * is usually either negotiated with the device, or determined structurally
82  * based on the shape of the buffers required for device operation.
83  *
84  * FRAMEWORK INITIALISATION: FINISHING
85  *
86  * Once queue configuration has been completed, the client driver calls
87  * virtio_init_complete() to finalise resource allocation and set the device to
88  * the running state (DRIVER_OK).  The framework will allocate any interrupts
89  * needed for queue notifications at this time.
90  *
91  * If the client driver cannot complete initialisation, the instance may
92  * instead be torn down with virtio_fini().  Signalling failure to this routine
93  * will report failure to the device instead of resetting it, which may be
94  * reported by the hypervisor as a fault.
95  *
96  * DESCRIPTOR CHAINS
97  *
98  * Most devices accept I/O requests from the driver through a least one queue.
99  * Some devices are operated by submission of synchronous requests.  The device
100  * is expected to process the request and return some kind of status; e.g., a
101  * block device accepts write requests from the file system and signals when
102  * they have completed or failed.
103  *
104  * Other devices operate by asynchronous delivery of I/O requests to the
105  * driver; e.g., a network device may receive incoming frames at any time.
106  * Inbound asynchronous delivery is usually achieved by populating a queue with
107  * a series of memory buffers where the incoming data will be written by the
108  * device at some later time.
109  *
110  * Whether for inbound or outbound transfers, buffers are inserted into the
111  * ring through chains of one or more descriptors.  Each descriptor has a
112  * transfer direction (to or from the device), and a physical address and
113  * length (i.e., a DMA cookie).  The framework automatically manages the slight
114  * differences in operation between direct and indirect descriptor usage on
115  * behalf of the client driver.
116  *
117  * A chain of descriptors is allocated by calling virtio_chain_alloc() against
118  * a particular queue.  This function accepts a kmem flag as per
119  * kmem_alloc(9F).  A client driver specific void pointer may be attached to
120  * the chain with virtio_chain_data_set() and read back later with
121  * virtio_chain_data(); e.g., after it is returned by a call to
122  * virtio_queue_poll().
123  *
124  * Cookies are added to a chain by calling virtio_chain_append() with the
125  * appropriate physical address and transfer direction.  This function may fail
126  * if the chain is already using the maximum number of cookies for this queue.
127  * Client drivers are responsible for appropriate use of virtio_dma_sync()
128  * or ddi_dma_sync(9F) on any memory appended to a descriptor chain prior to
129  * chain submission.
130  *
131  * Once fully constructed and synced, a chain can be submitted to the device by
132  * calling virtio_chain_submit().  The caller may choose to flush the queue
133  * contents to the device on each submission, or to batch notifications until
134  * later to amortise the notification cost over more requests.  If batching
135  * notifications, outstanding submissions can be flushed with a call to
136  * virtio_queue_flush().  Note that the framework will insert an appropriate
137  * memory barrier to ensure writes by the driver complete before making the
138  * submitted descriptor visible to the device.
139  *
140  * A chain may be reset for reuse with new cookies by calling
141  * virtio_chain_clear().  The chain may be freed completely by calling
142  * virtio_chain_free().
143  *
144  * When a descriptor chain is returned to the driver by the device, it may
145  * include a received data length value.  This value can be accessed via
146  * virtio_chain_received_length().  There is some suggestion in more recent
147  * Virtio specifications that, depending on the device type and the hypervisor
148  * this value may not always be accurate or useful.
149  *
150  * VIRTQUEUE OPERATION
151  *
152  * The queue size (i.e., the number of direct descriptor entries) can be
153  * found with virtio_queue_size().  This value is static over the lifetime
154  * of the queue.
155  *
156  * The number of descriptor chains presently submitted to the device and not
157  * yet returned can be obtained via virtio_queue_nactive().
158  *
159  * Over time the device will return descriptor chains to the driver in response
160  * to device activity.  Any newly returned chains may be retrieved by the
161  * driver by calling virtio_queue_poll().  See the DESCRIPTOR CHAINS section
162  * for more detail about managing descriptor chain objects.  Note that the
163  * framework will insert an appropriate memory barrier to ensure that writes by
164  * the host are complete before returning the chain to the client driver.
165  *
166  * The NO_INTERRUPT flag on a queue may be set or cleared with
167  * virtio_queue_no_interrupt().  Note that this flag is purely advisory, and
168  * may not actually stop interrupts from the device in a timely fashion.
169  *
170  * INTERRUPT MANAGEMENT
171  *
172  * A mutex used within an interrupt handler must be initialised with the
173  * correct interrupt priority.  After the initialisation phase is complete, the
174  * client should use virtio_intr_pri() to get a value suitable to pass to
175  * mutex_init(9F).
176  *
177  * When the driver is ready to receive notifications from the device, the
178  * virtio_interrupts_enable() routine may be called.  Interrupts may be
179  * disabled again by calling virtio_interrupts_disable().  Interrupt resources
180  * will be deallocated as part of a subsequent call to virtio_fini().
181  *
182  * DMA MEMORY MANAGEMENT: ALLOCATION AND FREE
183  *
184  * Client drivers may allocate memory suitable for communication with the
185  * device by using virtio_dma_alloc().  This function accepts an allocation
186  * size, a DMA attribute template, a set of DMA flags, and a kmem flag.
187  * A "virtio_dma_t" object is returned to track and manage the allocation.
188  *
189  * The DMA flags value will be a combination of direction flags (e.g.,
190  * DDI_DMA_READ or DDI_DMA_WRITE) and mapping flags (e.g., DDI_DMA_CONSISTENT
191  * or DDI_DMA_STREAMING).  The kmem flag is either KM_SLEEP or KM_NOSLEEP,
192  * as described in kmem_alloc(9F).
193  *
194  * Memory that is no longer required can be freed using virtio_dma_free().
195  *
196  * DMA MEMORY MANAGEMENT: BINDING WITHOUT ALLOCATION
197  *
198  * If another subsystem has loaned memory to your client driver, you may need
199  * to allocate and bind a handle without additional backing memory.  The
200  * virtio_dma_alloc_nomem() function can be used for this purpose, returning a
201  * "virtio_dma_t" object.
202  *
203  * Once allocated, an arbitrary kernel memory location can be bound for DMA
204  * with virtio_dma_bind().  The binding can be subsequently undone with
205  * virtio_dma_unbind(), allowing the "virtio_dma_t" object to be reused for
206  * another binding.
207  *
208  * DMA MEMORY MANAGEMENT: VIRTUAL AND PHYSICAL ADDRESSES
209  *
210  * The total size of a mapping (with or without own backing memory) can be
211  * found with virtio_dma_size().  A void pointer to a kernel virtual address
212  * within the buffer can be obtained via virtio_dma_va(); this function accepts
213  * a linear offset into the VA range and performs bounds checking.
214  *
215  * The number of physical memory addresses (DMA cookies) can be found with
216  * virtio_dma_ncookies().  The physical address and length of each cookie can
217  * be found with virtio_dma_cookie_pa() and virtio_dma_cookie_size(); these
218  * functions are keyed on the zero-indexed cookie number.
219  *
220  * DMA MEMORY MANAGEMENT: SYNCHRONISATION
221  *
222  * When passing memory to the device, or reading memory returned from the
223  * device, DMA synchronisation must be performed in case it is required by the
224  * underlying platform.  A convenience wrapper exists: virtio_dma_sync().  This
225  * routine synchronises the entire binding and accepts the same synchronisation
226  * type values as ddi_dma_sync(9F).
227  *
228  * QUIESCE
229  *
230  * As quiesce(9E) merely requires that the device come to a complete stop, most
231  * client drivers will be able to call virtio_quiesce() without additional
232  * actions.  This will reset the device, immediately halting all queue
233  * activity, and return a value suitable for returning from the client driver
234  * quiesce(9E) entrypoint.  This routine must only be called from quiesce
235  * context as it performs no synchronisation with other threads.
236  *
237  * DETACH
238  *
239  * Some devices are effectively long-polled; that is, they submit some number
240  * of descriptor chains to the device that are not returned to the driver until
241  * some asynchronous event occurs such as the receipt of an incoming packet or
242  * a device hot plug event.  When detaching the device the return of these
243  * outstanding buffers must be arranged.  Some device types may have task
244  * management commands that can force the orderly return of these chains, but
245  * the only way to do so uniformly is to reset the device and claw back the
246  * memory.
247  *
248  * If the client driver has outstanding descriptors and needs a hard stop on
249  * device activity it can call virtio_shutdown().  This routine will bring
250  * queue processing to an orderly stop and then reset the device, causing it to
251  * cease use of any DMA resources.  Once this function returns, the driver may
252  * call virtio_queue_evacuate() on each queue to retrieve any previously
253  * submitted chains.
254  *
255  * To tear down resources (e.g., interrupts and allocated memory) the client
256  * driver must finally call virtio_fini().  If virtio_shutdown() was not
257  * needed, this routine will also reset the device.
258  */
259 
260 #ifdef __cplusplus
261 extern "C" {
262 #endif
263 
264 typedef struct virtio virtio_t;
265 typedef struct virtio_queue virtio_queue_t;
266 typedef struct virtio_chain virtio_chain_t;
267 typedef struct virtio_dma virtio_dma_t;
268 
269 typedef enum virtio_direction {
270 	/*
271 	 * In the base specification, a descriptor is either set up to be
272 	 * written by the device or to be read by the device, but not both.
273 	 */
274 	VIRTIO_DIR_DEVICE_WRITES = 1,
275 	VIRTIO_DIR_DEVICE_READS
276 } virtio_direction_t;
277 
278 void virtio_fini(virtio_t *, boolean_t);
279 virtio_t *virtio_init(dev_info_t *, uint64_t, boolean_t);
280 int virtio_init_complete(virtio_t *, int);
281 int virtio_quiesce(virtio_t *);
282 void virtio_shutdown(virtio_t *);
283 
284 void *virtio_intr_pri(virtio_t *);
285 
286 void virtio_device_reset(virtio_t *);
287 
288 uint8_t virtio_dev_get8(virtio_t *, uintptr_t);
289 uint16_t virtio_dev_get16(virtio_t *, uintptr_t);
290 uint32_t virtio_dev_get32(virtio_t *, uintptr_t);
291 uint64_t virtio_dev_get64(virtio_t *, uintptr_t);
292 
293 void virtio_dev_put8(virtio_t *, uintptr_t, uint8_t);
294 void virtio_dev_put16(virtio_t *, uintptr_t, uint16_t);
295 void virtio_dev_put32(virtio_t *, uintptr_t, uint32_t);
296 
297 boolean_t virtio_feature_present(virtio_t *, uint64_t);
298 
299 virtio_queue_t *virtio_queue_alloc(virtio_t *, uint16_t, const char *,
300     ddi_intr_handler_t *, void *, boolean_t, uint_t);
301 
302 virtio_chain_t *virtio_queue_poll(virtio_queue_t *);
303 virtio_chain_t *virtio_queue_evacuate(virtio_queue_t *);
304 void virtio_queue_flush(virtio_queue_t *);
305 void virtio_queue_no_interrupt(virtio_queue_t *, boolean_t);
306 uint_t virtio_queue_nactive(virtio_queue_t *);
307 uint_t virtio_queue_size(virtio_queue_t *);
308 
309 virtio_chain_t *virtio_chain_alloc(virtio_queue_t *, int);
310 void virtio_chain_clear(virtio_chain_t *);
311 void virtio_chain_free(virtio_chain_t *);
312 int virtio_chain_append(virtio_chain_t *, uint64_t, size_t, virtio_direction_t);
313 
314 void *virtio_chain_data(virtio_chain_t *);
315 void virtio_chain_data_set(virtio_chain_t *, void *);
316 
317 void virtio_chain_submit(virtio_chain_t *, boolean_t);
318 size_t virtio_chain_received_length(virtio_chain_t *);
319 
320 int virtio_interrupts_enable(virtio_t *);
321 void virtio_interrupts_disable(virtio_t *);
322 
323 virtio_dma_t *virtio_dma_alloc(virtio_t *, size_t, const ddi_dma_attr_t *, int,
324     int);
325 virtio_dma_t *virtio_dma_alloc_nomem(virtio_t *, const ddi_dma_attr_t *, int);
326 void virtio_dma_free(virtio_dma_t *);
327 int virtio_dma_bind(virtio_dma_t *, void *, size_t, int, int);
328 void virtio_dma_unbind(virtio_dma_t *);
329 void virtio_dma_sync(virtio_dma_t *, int);
330 
331 void *virtio_dma_va(virtio_dma_t *, size_t);
332 size_t virtio_dma_size(virtio_dma_t *);
333 uint_t virtio_dma_ncookies(virtio_dma_t *);
334 uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t);
335 size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t);
336 
337 
338 #ifdef __cplusplus
339 }
340 #endif
341 
342 #endif /* _VIRTIO_H */
343