xref: /illumos-gate/usr/src/uts/common/io/virtio/virtio.h (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
15  */
16 
17 #ifndef _VIRTIO_H
18 #define	_VIRTIO_H
19 
20 /*
21  * VIRTIO FRAMEWORK
22  *
23  * This framework handles the initialisation and operation common to all Virtio
24  * device types; e.g., Virtio Block (vioblk), Virtio Network (vioif), etc.  The
25  * framework presently provides for what is now described as a "legacy" driver
26  * in the current issue of the "Virtual I/O Device (VIRTIO) Version 1.1"
27  * specification.  Though several new specifications have been released, legacy
28  * devices are still the most widely available on current hypervisor platforms.
29  * Legacy devices make use of the native byte order of the host system.
30  *
31  * FRAMEWORK INITIALISATION: STARTING
32  *
33  * Client drivers will, in their attach(9E) routine, make an early call to
34  * virtio_init().  This causes the framework to allocate some base resources
35  * and begin initialising the device.  This routine confirms that the device
36  * will operate in the supported legacy mode as per the specification.  A
37  * failure here means that we cannot presently support this device.
38  *
39  * Once virtio_init() returns, the initialisation phase has begun and the
40  * driver can examine negotiated features and set up virtqueues.  The
41  * initialisation phase ends when the driver calls either
42  * virtio_init_complete() or virtio_fini().
43  *
44  * FRAMEWORK INITIALISATION: FEATURE NEGOTIATION
45  *
46  * The virtio_init() call accepts a bitmask of desired features that the driver
47  * supports.  The framework will negotiate the common set of features supported
48  * by both the driver and the device.  The presence of any individual feature
49  * can be tested after the initialisation phase has begun using
50  * virtio_feature_present().
51  *
52  * The framework will additionally negotiate some set of features that are not
53  * specific to a device type on behalf of the client driver; e.g., support for
54  * indirect descriptors.
55  *
56  * Some features allow the driver to read additional configuration values from
57  * the device-specific regions of the device register space.  These can be
58  * accessed via the virtio_dev_get*() and virtio_dev_put*() family of
59  * functions.
60  *
61  * FRAMEWORK INITIALISATION: VIRTQUEUE CONFIGURATION
62  *
63  * During the initialisation phase, the client driver may configure some number
64  * of virtqueues with virtio_queue_alloc().  Once initialisation has been
65  * completed, no further queues can be configured without destroying the
66  * framework object and beginning again from scratch.
67  *
68  * When configuring a queue, the driver must know the queue index number.  This
69  * generally comes from the section of the specification describing the
70  * specific device type; e.g., Virtio Network devices have a receive queue at
71  * index 0, and a transmit queue at index 1.  The name given to the queue is
72  * informational and has no impact on device operation.
73  *
74  * Most queues will require an interrupt handler function.  When a queue
75  * notification interrupt is received, the provided handler will be called with
76  * two arguments: first, the provided user data argument; and second, a pointer
77  * to the "virtio_t" object for this instance.
78  *
79  * A maximum segment count must be selected for each queue.  This count is the
80  * upper bound on the number of scatter-gather cookies that will be accepted,
81  * and applies to both direct and indirect descriptor based queues.  This cap
82  * is usually either negotiated with the device, or determined structurally
83  * based on the shape of the buffers required for device operation.
84  *
85  * FRAMEWORK INITIALISATION: CONFIGURATION SPACE CHANGE HANDLER
86  *
87  * During the initialisation phase, the client driver may register a handler
88  * function for receiving device configuration space change events.  Once
89  * initialisation has been completed, this cannot be changed without destroying
90  * the framework object and beginning again from scratch.
91  *
92  * When a configuration space change interrupt is received, the provided
93  * handler will be called with two arguments: first, the provided user data
94  * argument; and second, a pointer to the "virtio_t" object for this instance.
95  * The handler is called in an interrupt context.
96  *
97  * FRAMEWORK INITIALISATION: FINISHING
98  *
99  * Once queue configuration has been completed, the client driver calls
100  * virtio_init_complete() to finalise resource allocation and set the device to
101  * the running state (DRIVER_OK).  The framework will allocate any interrupts
102  * needed for queue notifications at this time.
103  *
104  * If the client driver cannot complete initialisation, the instance may
105  * instead be torn down with virtio_fini().  Signalling failure to this routine
106  * will report failure to the device instead of resetting it, which may be
107  * reported by the hypervisor as a fault.
108  *
109  * DESCRIPTOR CHAINS
110  *
111  * Most devices accept I/O requests from the driver through a least one queue.
112  * Some devices are operated by submission of synchronous requests.  The device
113  * is expected to process the request and return some kind of status; e.g., a
114  * block device accepts write requests from the file system and signals when
115  * they have completed or failed.
116  *
117  * Other devices operate by asynchronous delivery of I/O requests to the
118  * driver; e.g., a network device may receive incoming frames at any time.
119  * Inbound asynchronous delivery is usually achieved by populating a queue with
120  * a series of memory buffers where the incoming data will be written by the
121  * device at some later time.
122  *
123  * Whether for inbound or outbound transfers, buffers are inserted into the
124  * ring through chains of one or more descriptors.  Each descriptor has a
125  * transfer direction (to or from the device), and a physical address and
126  * length (i.e., a DMA cookie).  The framework automatically manages the slight
127  * differences in operation between direct and indirect descriptor usage on
128  * behalf of the client driver.
129  *
130  * A chain of descriptors is allocated by calling virtio_chain_alloc() against
131  * a particular queue.  This function accepts a kmem flag as per
132  * kmem_alloc(9F).  A client driver specific void pointer may be attached to
133  * the chain with virtio_chain_data_set() and read back later with
134  * virtio_chain_data(); e.g., after it is returned by a call to
135  * virtio_queue_poll().
136  *
137  * Cookies are added to a chain by calling virtio_chain_append() with the
138  * appropriate physical address and transfer direction.  This function may fail
139  * if the chain is already using the maximum number of cookies for this queue.
140  * Client drivers are responsible for appropriate use of virtio_dma_sync()
141  * or ddi_dma_sync(9F) on any memory appended to a descriptor chain prior to
142  * chain submission.
143  *
144  * Once fully constructed and synced, a chain can be submitted to the device by
145  * calling virtio_chain_submit().  The caller may choose to flush the queue
146  * contents to the device on each submission, or to batch notifications until
147  * later to amortise the notification cost over more requests.  If batching
148  * notifications, outstanding submissions can be flushed with a call to
149  * virtio_queue_flush().  Note that the framework will insert an appropriate
150  * memory barrier to ensure writes by the driver complete before making the
151  * submitted descriptor visible to the device.
152  *
153  * A chain may be reset for reuse with new cookies by calling
154  * virtio_chain_clear().  The chain may be freed completely by calling
155  * virtio_chain_free().
156  *
157  * When a descriptor chain is returned to the driver by the device, it may
158  * include a received data length value.  This value can be accessed via
159  * virtio_chain_received_length().  There is some suggestion in more recent
160  * Virtio specifications that, depending on the device type and the hypervisor
161  * this value may not always be accurate or useful.
162  *
163  * VIRTQUEUE OPERATION
164  *
165  * The queue size (i.e., the number of direct descriptor entries) can be
166  * found with virtio_queue_size().  This value is static over the lifetime
167  * of the queue.
168  *
169  * The number of descriptor chains presently submitted to the device and not
170  * yet returned can be obtained via virtio_queue_nactive().
171  *
172  * Over time the device will return descriptor chains to the driver in response
173  * to device activity.  Any newly returned chains may be retrieved by the
174  * driver by calling virtio_queue_poll().  See the DESCRIPTOR CHAINS section
175  * for more detail about managing descriptor chain objects.  Note that the
176  * framework will insert an appropriate memory barrier to ensure that writes by
177  * the host are complete before returning the chain to the client driver.
178  *
179  * The NO_INTERRUPT flag on a queue may be set or cleared with
180  * virtio_queue_no_interrupt().  Note that this flag is purely advisory, and
181  * may not actually stop interrupts from the device in a timely fashion.
182  *
183  * INTERRUPT MANAGEMENT
184  *
185  * A mutex used within an interrupt handler must be initialised with the
186  * correct interrupt priority.  After the initialisation phase is complete, the
187  * client should use virtio_intr_pri() to get a value suitable to pass to
188  * mutex_init(9F).
189  *
190  * When the driver is ready to receive notifications from the device, the
191  * virtio_interrupts_enable() routine may be called.  Interrupts may be
192  * disabled again by calling virtio_interrupts_disable().  Interrupt resources
193  * will be deallocated as part of a subsequent call to virtio_fini().
194  *
195  * DMA MEMORY MANAGEMENT: ALLOCATION AND FREE
196  *
197  * Client drivers may allocate memory suitable for communication with the
198  * device by using virtio_dma_alloc().  This function accepts an allocation
199  * size, a DMA attribute template, a set of DMA flags, and a kmem flag.
200  * A "virtio_dma_t" object is returned to track and manage the allocation.
201  *
202  * The DMA flags value will be a combination of direction flags (e.g.,
203  * DDI_DMA_READ or DDI_DMA_WRITE) and mapping flags (e.g., DDI_DMA_CONSISTENT
204  * or DDI_DMA_STREAMING).  The kmem flag is either KM_SLEEP or KM_NOSLEEP,
205  * as described in kmem_alloc(9F).
206  *
207  * Memory that is no longer required can be freed using virtio_dma_free().
208  *
209  * DMA MEMORY MANAGEMENT: BINDING WITHOUT ALLOCATION
210  *
211  * If another subsystem has loaned memory to your client driver, you may need
212  * to allocate and bind a handle without additional backing memory.  The
213  * virtio_dma_alloc_nomem() function can be used for this purpose, returning a
214  * "virtio_dma_t" object.
215  *
216  * Once allocated, an arbitrary kernel memory location can be bound for DMA
217  * with virtio_dma_bind().  The binding can be subsequently undone with
218  * virtio_dma_unbind(), allowing the "virtio_dma_t" object to be reused for
219  * another binding.
220  *
221  * DMA MEMORY MANAGEMENT: VIRTUAL AND PHYSICAL ADDRESSES
222  *
223  * The total size of a mapping (with or without own backing memory) can be
224  * found with virtio_dma_size().  A void pointer to a kernel virtual address
225  * within the buffer can be obtained via virtio_dma_va(); this function accepts
226  * a linear offset into the VA range and performs bounds checking.
227  *
228  * The number of physical memory addresses (DMA cookies) can be found with
229  * virtio_dma_ncookies().  The physical address and length of each cookie can
230  * be found with virtio_dma_cookie_pa() and virtio_dma_cookie_size(); these
231  * functions are keyed on the zero-indexed cookie number.
232  *
233  * DMA MEMORY MANAGEMENT: SYNCHRONISATION
234  *
235  * When passing memory to the device, or reading memory returned from the
236  * device, DMA synchronisation must be performed in case it is required by the
237  * underlying platform.  A convenience wrapper exists: virtio_dma_sync().  This
238  * routine synchronises the entire binding and accepts the same synchronisation
239  * type values as ddi_dma_sync(9F).
240  *
241  * QUIESCE
242  *
243  * As quiesce(9E) merely requires that the device come to a complete stop, most
244  * client drivers will be able to call virtio_quiesce() without additional
245  * actions.  This will reset the device, immediately halting all queue
246  * activity, and return a value suitable for returning from the client driver
247  * quiesce(9E) entrypoint.  This routine must only be called from quiesce
248  * context as it performs no synchronisation with other threads.
249  *
250  * DETACH
251  *
252  * Some devices are effectively long-polled; that is, they submit some number
253  * of descriptor chains to the device that are not returned to the driver until
254  * some asynchronous event occurs such as the receipt of an incoming packet or
255  * a device hot plug event.  When detaching the device the return of these
256  * outstanding buffers must be arranged.  Some device types may have task
257  * management commands that can force the orderly return of these chains, but
258  * the only way to do so uniformly is to reset the device and claw back the
259  * memory.
260  *
261  * If the client driver has outstanding descriptors and needs a hard stop on
262  * device activity it can call virtio_shutdown().  This routine will bring
263  * queue processing to an orderly stop and then reset the device, causing it to
264  * cease use of any DMA resources.  Once this function returns, the driver may
265  * call virtio_queue_evacuate() on each queue to retrieve any previously
266  * submitted chains.
267  *
268  * To tear down resources (e.g., interrupts and allocated memory) the client
269  * driver must finally call virtio_fini().  If virtio_shutdown() was not
270  * needed, this routine will also reset the device.
271  */
272 
273 #ifdef __cplusplus
274 extern "C" {
275 #endif
276 
277 typedef struct virtio virtio_t;
278 typedef struct virtio_queue virtio_queue_t;
279 typedef struct virtio_chain virtio_chain_t;
280 typedef struct virtio_dma virtio_dma_t;
281 
282 typedef enum virtio_direction {
283 	/*
284 	 * In the base specification, a descriptor is either set up to be
285 	 * written by the device or to be read by the device, but not both.
286 	 */
287 	VIRTIO_DIR_DEVICE_WRITES = 1,
288 	VIRTIO_DIR_DEVICE_READS
289 } virtio_direction_t;
290 
291 void virtio_fini(virtio_t *, boolean_t);
292 virtio_t *virtio_init(dev_info_t *, uint64_t, boolean_t);
293 int virtio_init_complete(virtio_t *, int);
294 int virtio_quiesce(virtio_t *);
295 void virtio_shutdown(virtio_t *);
296 
297 void virtio_register_cfgchange_handler(virtio_t *, ddi_intr_handler_t *,
298     void *);
299 
300 void *virtio_intr_pri(virtio_t *);
301 
302 void virtio_device_reset(virtio_t *);
303 
304 uint8_t virtio_dev_get8(virtio_t *, uintptr_t);
305 uint16_t virtio_dev_get16(virtio_t *, uintptr_t);
306 uint32_t virtio_dev_get32(virtio_t *, uintptr_t);
307 uint64_t virtio_dev_get64(virtio_t *, uintptr_t);
308 
309 void virtio_dev_put8(virtio_t *, uintptr_t, uint8_t);
310 void virtio_dev_put16(virtio_t *, uintptr_t, uint16_t);
311 void virtio_dev_put32(virtio_t *, uintptr_t, uint32_t);
312 
313 boolean_t virtio_feature_present(virtio_t *, uint64_t);
314 
315 virtio_queue_t *virtio_queue_alloc(virtio_t *, uint16_t, const char *,
316     ddi_intr_handler_t *, void *, boolean_t, uint_t);
317 
318 virtio_chain_t *virtio_queue_poll(virtio_queue_t *);
319 virtio_chain_t *virtio_queue_evacuate(virtio_queue_t *);
320 void virtio_queue_flush(virtio_queue_t *);
321 void virtio_queue_no_interrupt(virtio_queue_t *, boolean_t);
322 uint_t virtio_queue_nactive(virtio_queue_t *);
323 uint_t virtio_queue_size(virtio_queue_t *);
324 
325 virtio_chain_t *virtio_chain_alloc(virtio_queue_t *, int);
326 void virtio_chain_clear(virtio_chain_t *);
327 void virtio_chain_free(virtio_chain_t *);
328 int virtio_chain_append(virtio_chain_t *, uint64_t, size_t, virtio_direction_t);
329 
330 void *virtio_chain_data(virtio_chain_t *);
331 void virtio_chain_data_set(virtio_chain_t *, void *);
332 
333 void virtio_chain_submit(virtio_chain_t *, boolean_t);
334 size_t virtio_chain_received_length(virtio_chain_t *);
335 
336 int virtio_interrupts_enable(virtio_t *);
337 void virtio_interrupts_disable(virtio_t *);
338 
339 virtio_dma_t *virtio_dma_alloc(virtio_t *, size_t, const ddi_dma_attr_t *, int,
340     int);
341 virtio_dma_t *virtio_dma_alloc_nomem(virtio_t *, const ddi_dma_attr_t *, int);
342 void virtio_dma_free(virtio_dma_t *);
343 int virtio_dma_bind(virtio_dma_t *, void *, size_t, int, int);
344 void virtio_dma_unbind(virtio_dma_t *);
345 void virtio_dma_sync(virtio_dma_t *, int);
346 
347 void *virtio_dma_va(virtio_dma_t *, size_t);
348 size_t virtio_dma_size(virtio_dma_t *);
349 uint_t virtio_dma_ncookies(virtio_dma_t *);
350 uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t);
351 size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t);
352 
353 /*
354  * virtio_init_complete() accepts a mask of allowed interrupt types using the
355  * DDI_INTR_TYPE_* family of constants.  If no specific interrupt type is
356  * required, pass VIRTIO_ANY_INTR_TYPE instead:
357  */
358 #define	VIRTIO_ANY_INTR_TYPE	0
359 
360 #ifdef __cplusplus
361 }
362 #endif
363 
364 #endif /* _VIRTIO_H */
365