1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
15 */
16
17 /*
18 * VIRTIO FRAMEWORK
19 *
20 * For design and usage documentation, see the comments in "virtio.h".
21 */
22
23 #include <sys/conf.h>
24 #include <sys/kmem.h>
25 #include <sys/debug.h>
26 #include <sys/modctl.h>
27 #include <sys/autoconf.h>
28 #include <sys/ddi_impldefs.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/sunndi.h>
32 #include <sys/avintr.h>
33 #include <sys/spl.h>
34 #include <sys/promif.h>
35 #include <sys/list.h>
36 #include <sys/bootconf.h>
37 #include <sys/bootsvcs.h>
38 #include <sys/sysmacros.h>
39 #include <sys/pci.h>
40
41 #include "virtio.h"
42 #include "virtio_impl.h"
43
44
45 /*
46 * Linkage structures
47 */
48 static struct modlmisc virtio_modlmisc = {
49 .misc_modops = &mod_miscops,
50 .misc_linkinfo = "VIRTIO common routines",
51 };
52
53 static struct modlinkage virtio_modlinkage = {
54 .ml_rev = MODREV_1,
55 .ml_linkage = { &virtio_modlmisc, NULL }
56 };
57
58 int
_init(void)59 _init(void)
60 {
61 return (mod_install(&virtio_modlinkage));
62 }
63
64 int
_fini(void)65 _fini(void)
66 {
67 return (mod_remove(&virtio_modlinkage));
68 }
69
70 int
_info(struct modinfo * modinfop)71 _info(struct modinfo *modinfop)
72 {
73 return (mod_info(&virtio_modlinkage, modinfop));
74 }
75
76
77
78 static void virtio_set_status(virtio_t *, uint8_t);
79 static void virtio_set_status_locked(virtio_t *, uint8_t);
80 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t,
81 uint16_t);
82 static int virtio_interrupts_setup(virtio_t *, int);
83 static void virtio_interrupts_teardown(virtio_t *);
84 static void virtio_interrupts_disable_locked(virtio_t *);
85 static void virtio_queue_free(virtio_queue_t *);
86 static void virtio_device_reset_locked(virtio_t *);
87
88 /*
89 * We use the same device access attributes for BAR mapping and access to the
90 * virtqueue memory.
91 */
92 ddi_device_acc_attr_t virtio_acc_attr = {
93 .devacc_attr_version = DDI_DEVICE_ATTR_V1,
94 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC,
95 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC,
96 .devacc_attr_access = DDI_DEFAULT_ACC
97 };
98
99
100 /*
101 * DMA attributes for the memory given to the device for queue management.
102 */
103 ddi_dma_attr_t virtio_dma_attr_queue = {
104 .dma_attr_version = DMA_ATTR_V0,
105 .dma_attr_addr_lo = 0x0000000000000000,
106 /*
107 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted
108 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a
109 * 32-bit register.
110 */
111 .dma_attr_addr_hi = 0x00000FFFFFFFF000,
112 .dma_attr_count_max = 0x00000000FFFFFFFF,
113 .dma_attr_align = VIRTIO_PAGE_SIZE,
114 .dma_attr_burstsizes = 1,
115 .dma_attr_minxfer = 1,
116 .dma_attr_maxxfer = 0x00000000FFFFFFFF,
117 .dma_attr_seg = 0x00000000FFFFFFFF,
118 .dma_attr_sgllen = 1,
119 .dma_attr_granular = 1,
120 .dma_attr_flags = 0
121 };
122
123 /*
124 * DMA attributes for the the allocation of indirect descriptor lists. The
125 * indirect list is referenced by a regular descriptor entry: the physical
126 * address field is 64 bits wide, but the length field is only 32 bits. Each
127 * descriptor is 16 bytes long.
128 */
129 ddi_dma_attr_t virtio_dma_attr_indirect = {
130 .dma_attr_version = DMA_ATTR_V0,
131 .dma_attr_addr_lo = 0x0000000000000000,
132 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
133 .dma_attr_count_max = 0x00000000FFFFFFFF,
134 .dma_attr_align = sizeof (struct virtio_vq_desc),
135 .dma_attr_burstsizes = 1,
136 .dma_attr_minxfer = 1,
137 .dma_attr_maxxfer = 0x00000000FFFFFFFF,
138 .dma_attr_seg = 0x00000000FFFFFFFF,
139 .dma_attr_sgllen = 1,
140 .dma_attr_granular = 1,
141 .dma_attr_flags = 0
142 };
143
144
145 uint8_t
virtio_get8(virtio_t * vio,uintptr_t offset)146 virtio_get8(virtio_t *vio, uintptr_t offset)
147 {
148 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset)));
149 }
150
151 uint16_t
virtio_get16(virtio_t * vio,uintptr_t offset)152 virtio_get16(virtio_t *vio, uintptr_t offset)
153 {
154 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset)));
155 }
156
157 uint32_t
virtio_get32(virtio_t * vio,uintptr_t offset)158 virtio_get32(virtio_t *vio, uintptr_t offset)
159 {
160 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset)));
161 }
162
163 void
virtio_put8(virtio_t * vio,uintptr_t offset,uint8_t value)164 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
165 {
166 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value);
167 }
168
169 void
virtio_put16(virtio_t * vio,uintptr_t offset,uint16_t value)170 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
171 {
172 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value);
173 }
174
175 void
virtio_put32(virtio_t * vio,uintptr_t offset,uint32_t value)176 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
177 {
178 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value);
179 }
180
181 void
virtio_fini(virtio_t * vio,boolean_t failed)182 virtio_fini(virtio_t *vio, boolean_t failed)
183 {
184 mutex_enter(&vio->vio_mutex);
185
186 virtio_interrupts_teardown(vio);
187
188 virtio_queue_t *viq;
189 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) {
190 virtio_queue_free(viq);
191 }
192 list_destroy(&vio->vio_queues);
193
194 if (failed) {
195 /*
196 * Signal to the host that device setup failed.
197 */
198 virtio_set_status_locked(vio, VIRTIO_STATUS_FAILED);
199 } else {
200 virtio_device_reset_locked(vio);
201 }
202
203 /*
204 * We don't need to do anything for the provider initlevel, as it
205 * merely records the fact that virtio_init_complete() was called.
206 */
207 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER;
208
209 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) {
210 /*
211 * Unmap PCI BAR0.
212 */
213 ddi_regs_map_free(&vio->vio_barh);
214
215 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS;
216 }
217
218 /*
219 * Ensure we have torn down everything we set up.
220 */
221 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN;
222 VERIFY0(vio->vio_initlevel);
223
224 mutex_exit(&vio->vio_mutex);
225 mutex_destroy(&vio->vio_mutex);
226
227 kmem_free(vio, sizeof (*vio));
228 }
229
230 /*
231 * Early device initialisation for legacy (pre-1.0 specification) virtio
232 * devices.
233 */
234 virtio_t *
virtio_init(dev_info_t * dip,uint64_t driver_features,boolean_t allow_indirect)235 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect)
236 {
237 int r;
238
239 /*
240 * First, confirm that this is a legacy device.
241 */
242 ddi_acc_handle_t pci;
243 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) {
244 dev_err(dip, CE_WARN, "pci_config_setup failed");
245 return (NULL);
246 }
247
248 uint8_t revid;
249 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) {
250 dev_err(dip, CE_WARN, "could not read config space");
251 pci_config_teardown(&pci);
252 return (NULL);
253 }
254
255 pci_config_teardown(&pci);
256
257 /*
258 * The legacy specification requires that the device advertise as PCI
259 * Revision 0.
260 */
261 if (revid != 0) {
262 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for "
263 "legacy virtio device", (uint_t)revid);
264 return (NULL);
265 }
266
267 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP);
268 vio->vio_dip = dip;
269
270 /*
271 * Map PCI BAR0 for legacy device access.
272 */
273 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0,
274 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr,
275 &vio->vio_barh)) != DDI_SUCCESS) {
276 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r);
277 kmem_free(vio, sizeof (*vio));
278 return (NULL);
279 }
280 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS;
281
282 /*
283 * We initialise the mutex without an interrupt priority to ease the
284 * implementation of some of the configuration space access routines.
285 * Drivers using the virtio framework MUST make a call to
286 * "virtio_init_complete()" prior to spawning other threads or enabling
287 * interrupt handlers, at which time we will destroy and reinitialise
288 * the mutex for use in our interrupt handlers.
289 */
290 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL);
291
292 list_create(&vio->vio_queues, sizeof (virtio_queue_t),
293 offsetof(virtio_queue_t, viq_link));
294
295 /*
296 * Legacy virtio devices require a few common steps before we can
297 * negotiate device features.
298 */
299 virtio_device_reset(vio);
300 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE);
301 virtio_set_status(vio, VIRTIO_STATUS_DRIVER);
302
303 /*
304 * Negotiate features with the device. Record the original supported
305 * feature set for debugging purposes.
306 */
307 vio->vio_features_device = virtio_get32(vio,
308 VIRTIO_LEGACY_FEATURES_DEVICE);
309 if (allow_indirect) {
310 driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
311 }
312 vio->vio_features = vio->vio_features_device & driver_features;
313 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features);
314
315 /*
316 * The device-specific configuration begins at an offset into the BAR
317 * that depends on whether we have enabled MSI-X interrupts or not.
318 * Start out with the offset for pre-MSI-X operation so that we can
319 * read device configuration space prior to configuring interrupts.
320 */
321 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET;
322
323 return (vio);
324 }
325
326 /*
327 * Some virtio devices can change their device configuration state at any
328 * time. This function may be called by the driver during the initialisation
329 * phase - before calling virtio_init_complete() - in order to register a
330 * handler function which will be called when the device configuration space
331 * is updated.
332 */
333 void
virtio_register_cfgchange_handler(virtio_t * vio,ddi_intr_handler_t * func,void * funcarg)334 virtio_register_cfgchange_handler(virtio_t *vio, ddi_intr_handler_t *func,
335 void *funcarg)
336 {
337 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
338 VERIFY(!vio->vio_cfgchange_handler_added);
339
340 mutex_enter(&vio->vio_mutex);
341 vio->vio_cfgchange_handler = func;
342 vio->vio_cfgchange_handlerarg = funcarg;
343 mutex_exit(&vio->vio_mutex);
344 }
345
346 /*
347 * This function must be called by the driver once it has completed early setup
348 * calls. The value of "allowed_interrupt_types" is a mask of interrupt types
349 * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or
350 * the special value 0 to allow the system to use any available type.
351 */
352 int
virtio_init_complete(virtio_t * vio,int allowed_interrupt_types)353 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types)
354 {
355 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER));
356 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER;
357
358 if (!list_is_empty(&vio->vio_queues) ||
359 vio->vio_cfgchange_handler != NULL) {
360 /*
361 * Set up interrupts for the queues that have been registered.
362 */
363 if (virtio_interrupts_setup(vio, allowed_interrupt_types) !=
364 DDI_SUCCESS) {
365 return (DDI_FAILURE);
366 }
367 }
368
369 /*
370 * We can allocate the mutex once we know the priority.
371 */
372 mutex_destroy(&vio->vio_mutex);
373 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
374 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
375 viq = list_next(&vio->vio_queues, viq)) {
376 mutex_destroy(&viq->viq_mutex);
377 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER,
378 virtio_intr_pri(vio));
379 }
380
381 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK);
382
383 return (DDI_SUCCESS);
384 }
385
386 boolean_t
virtio_feature_present(virtio_t * vio,uint64_t feature_mask)387 virtio_feature_present(virtio_t *vio, uint64_t feature_mask)
388 {
389 return ((vio->vio_features & feature_mask) != 0);
390 }
391
392 void *
virtio_intr_pri(virtio_t * vio)393 virtio_intr_pri(virtio_t *vio)
394 {
395 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED);
396
397 return (DDI_INTR_PRI(vio->vio_interrupt_priority));
398 }
399
400 /*
401 * Enable a bit in the device status register. Each bit signals a level of
402 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_*
403 * constants for "status". To zero the status field use virtio_device_reset().
404 */
405 static void
virtio_set_status_locked(virtio_t * vio,uint8_t status)406 virtio_set_status_locked(virtio_t *vio, uint8_t status)
407 {
408 VERIFY3U(status, !=, 0);
409 VERIFY(MUTEX_HELD(&vio->vio_mutex));
410
411 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS);
412 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old);
413 }
414
415 static void
virtio_set_status(virtio_t * vio,uint8_t status)416 virtio_set_status(virtio_t *vio, uint8_t status)
417 {
418 mutex_enter(&vio->vio_mutex);
419 virtio_set_status_locked(vio, status);
420 mutex_exit(&vio->vio_mutex);
421 }
422
423 static void
virtio_device_reset_locked(virtio_t * vio)424 virtio_device_reset_locked(virtio_t *vio)
425 {
426 VERIFY(MUTEX_HELD(&vio->vio_mutex));
427 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET);
428 }
429
430 void
virtio_device_reset(virtio_t * vio)431 virtio_device_reset(virtio_t *vio)
432 {
433 mutex_enter(&vio->vio_mutex);
434 virtio_device_reset_locked(vio);
435 mutex_exit(&vio->vio_mutex);
436 }
437
438 /*
439 * Some queues are effectively long-polled; the driver submits a series of
440 * buffers and the device only returns them when there is data available.
441 * During detach, we need to coordinate the return of these buffers. Calling
442 * "virtio_shutdown()" will reset the device, then allow the removal of all
443 * buffers that were in flight at the time of shutdown via
444 * "virtio_queue_evacuate()".
445 */
446 void
virtio_shutdown(virtio_t * vio)447 virtio_shutdown(virtio_t *vio)
448 {
449 mutex_enter(&vio->vio_mutex);
450 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
451 /*
452 * Shutdown has been performed already.
453 */
454 mutex_exit(&vio->vio_mutex);
455 return;
456 }
457
458 /*
459 * First, mark all of the queues as shutdown. This will prevent any
460 * further activity.
461 */
462 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
463 viq = list_next(&vio->vio_queues, viq)) {
464 mutex_enter(&viq->viq_mutex);
465 viq->viq_shutdown = B_TRUE;
466 mutex_exit(&viq->viq_mutex);
467 }
468
469 /*
470 * Now, reset the device. This removes any queue configuration on the
471 * device side.
472 */
473 virtio_device_reset_locked(vio);
474 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN;
475 mutex_exit(&vio->vio_mutex);
476 }
477
478 /*
479 * Common implementation of quiesce(9E) for simple Virtio-based devices.
480 */
481 int
virtio_quiesce(virtio_t * vio)482 virtio_quiesce(virtio_t *vio)
483 {
484 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
485 /*
486 * Device has already been reset.
487 */
488 return (DDI_SUCCESS);
489 }
490
491 /*
492 * When we reset the device, it should immediately stop using any DMA
493 * memory we've previously passed to it. All queue configuration is
494 * discarded. This is good enough for quiesce(9E).
495 */
496 virtio_device_reset_locked(vio);
497
498 return (DDI_SUCCESS);
499 }
500
501 /*
502 * DEVICE-SPECIFIC REGISTER ACCESS
503 *
504 * Note that these functions take the mutex to avoid racing with interrupt
505 * enable/disable, when the device-specific offset can potentially change.
506 */
507
508 uint8_t
virtio_dev_get8(virtio_t * vio,uintptr_t offset)509 virtio_dev_get8(virtio_t *vio, uintptr_t offset)
510 {
511 mutex_enter(&vio->vio_mutex);
512 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset);
513 mutex_exit(&vio->vio_mutex);
514
515 return (r);
516 }
517
518 uint16_t
virtio_dev_get16(virtio_t * vio,uintptr_t offset)519 virtio_dev_get16(virtio_t *vio, uintptr_t offset)
520 {
521 mutex_enter(&vio->vio_mutex);
522 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset);
523 mutex_exit(&vio->vio_mutex);
524
525 return (r);
526 }
527
528 uint32_t
virtio_dev_get32(virtio_t * vio,uintptr_t offset)529 virtio_dev_get32(virtio_t *vio, uintptr_t offset)
530 {
531 mutex_enter(&vio->vio_mutex);
532 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset);
533 mutex_exit(&vio->vio_mutex);
534
535 return (r);
536 }
537
538 uint64_t
virtio_dev_get64(virtio_t * vio,uintptr_t offset)539 virtio_dev_get64(virtio_t *vio, uintptr_t offset)
540 {
541 mutex_enter(&vio->vio_mutex);
542 /*
543 * On at least some systems, a 64-bit read or write to this BAR is not
544 * possible. For legacy devices, there is no generation number to use
545 * to determine if configuration may have changed half-way through a
546 * read. We need to continue to read both halves of the value until we
547 * read the same value at least twice.
548 */
549 uintptr_t o_lo = vio->vio_config_offset + offset;
550 uintptr_t o_hi = o_lo + 4;
551
552 uint64_t val = virtio_get32(vio, o_lo) |
553 ((uint64_t)virtio_get32(vio, o_hi) << 32);
554
555 for (;;) {
556 uint64_t tval = virtio_get32(vio, o_lo) |
557 ((uint64_t)virtio_get32(vio, o_hi) << 32);
558
559 if (tval == val) {
560 break;
561 }
562
563 val = tval;
564 }
565
566 mutex_exit(&vio->vio_mutex);
567 return (val);
568 }
569
570 void
virtio_dev_put8(virtio_t * vio,uintptr_t offset,uint8_t value)571 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
572 {
573 mutex_enter(&vio->vio_mutex);
574 virtio_put8(vio, vio->vio_config_offset + offset, value);
575 mutex_exit(&vio->vio_mutex);
576 }
577
578 void
virtio_dev_put16(virtio_t * vio,uintptr_t offset,uint16_t value)579 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
580 {
581 mutex_enter(&vio->vio_mutex);
582 virtio_put16(vio, vio->vio_config_offset + offset, value);
583 mutex_exit(&vio->vio_mutex);
584 }
585
586 void
virtio_dev_put32(virtio_t * vio,uintptr_t offset,uint32_t value)587 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
588 {
589 mutex_enter(&vio->vio_mutex);
590 virtio_put32(vio, vio->vio_config_offset + offset, value);
591 mutex_exit(&vio->vio_mutex);
592 }
593
594 /*
595 * VIRTQUEUE MANAGEMENT
596 */
597
598 static int
virtio_inflight_compar(const void * lp,const void * rp)599 virtio_inflight_compar(const void *lp, const void *rp)
600 {
601 const virtio_chain_t *l = lp;
602 const virtio_chain_t *r = rp;
603
604 if (l->vic_head < r->vic_head) {
605 return (-1);
606 } else if (l->vic_head > r->vic_head) {
607 return (1);
608 } else {
609 return (0);
610 }
611 }
612
613 virtio_queue_t *
virtio_queue_alloc(virtio_t * vio,uint16_t qidx,const char * name,ddi_intr_handler_t * func,void * funcarg,boolean_t force_direct,uint_t max_segs)614 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name,
615 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct,
616 uint_t max_segs)
617 {
618 uint16_t qsz;
619 char space_name[256];
620
621 if (max_segs < 1) {
622 /*
623 * Every descriptor, direct or indirect, needs to refer to at
624 * least one buffer.
625 */
626 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
627 "segment count must be at least 1", name, (uint_t)qidx);
628 return (NULL);
629 }
630
631 mutex_enter(&vio->vio_mutex);
632
633 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) {
634 /*
635 * Cannot configure any more queues once initial setup is
636 * complete and interrupts have been allocated.
637 */
638 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
639 "alloc after init complete", name, (uint_t)qidx);
640 mutex_exit(&vio->vio_mutex);
641 return (NULL);
642 }
643
644 /*
645 * There is no way to negotiate a different queue size for legacy
646 * devices. We must read and use the native queue size of the device.
647 */
648 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx);
649 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) {
650 /*
651 * A size of zero means the device does not have a queue with
652 * this index.
653 */
654 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
655 "does not exist on device", name, (uint_t)qidx);
656 mutex_exit(&vio->vio_mutex);
657 return (NULL);
658 }
659
660 mutex_exit(&vio->vio_mutex);
661
662 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP);
663 viq->viq_virtio = vio;
664 viq->viq_name = name;
665 viq->viq_index = qidx;
666 viq->viq_size = qsz;
667 viq->viq_func = func;
668 viq->viq_funcarg = funcarg;
669 viq->viq_max_segs = max_segs;
670 avl_create(&viq->viq_inflight, virtio_inflight_compar,
671 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node));
672
673 /*
674 * Allocate the mutex without an interrupt priority for now, as we do
675 * with "vio_mutex". We'll reinitialise it in
676 * "virtio_init_complete()".
677 */
678 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL);
679
680 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) &&
681 !force_direct) {
682 /*
683 * If we were able to negotiate the indirect descriptor
684 * feature, and the caller has not explicitly forced the use of
685 * direct descriptors, we'll allocate indirect descriptor lists
686 * for each chain.
687 */
688 viq->viq_indirect = B_TRUE;
689 }
690
691 /*
692 * Track descriptor usage in an identifier space.
693 */
694 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s",
695 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name);
696 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) {
697 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor "
698 "ID space");
699 virtio_queue_free(viq);
700 return (NULL);
701 }
702
703 /*
704 * For legacy devices, memory for the queue has a strict layout
705 * determined by the queue size.
706 */
707 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz;
708 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs +
709 sizeof (virtio_vq_driver_t) +
710 sizeof (uint16_t) * qsz,
711 VIRTIO_PAGE_SIZE, size_t);
712 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) +
713 sizeof (virtio_vq_elem_t) * qsz,
714 VIRTIO_PAGE_SIZE, size_t);
715
716 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device,
717 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
718 KM_SLEEP) != DDI_SUCCESS) {
719 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue "
720 "DMA memory");
721 virtio_queue_free(viq);
722 return (NULL);
723 }
724
725 /*
726 * NOTE: The viq_dma_* members below are used by
727 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate
728 * offsets into the DMA allocation for partial synchronisation. If the
729 * ordering of, or relationship between, these pointers changes, the
730 * macros must be kept in sync.
731 */
732 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0);
733 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs);
734 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver);
735
736 /*
737 * Install in the per-device list of queues.
738 */
739 mutex_enter(&vio->vio_mutex);
740 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL;
741 chkvq = list_next(&vio->vio_queues, chkvq)) {
742 if (chkvq->viq_index == qidx) {
743 dev_err(vio->vio_dip, CE_WARN, "attempt to register "
744 "queue \"%s\" with same index (%d) as queue \"%s\"",
745 name, qidx, chkvq->viq_name);
746 mutex_exit(&vio->vio_mutex);
747 virtio_queue_free(viq);
748 return (NULL);
749 }
750 }
751 list_insert_tail(&vio->vio_queues, viq);
752
753 /*
754 * Ensure the zeroing of the queue memory is visible to the host before
755 * we inform the device of the queue address.
756 */
757 membar_producer();
758 VIRTQ_DMA_SYNC_FORDEV(viq);
759
760 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx);
761 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS,
762 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT);
763
764 mutex_exit(&vio->vio_mutex);
765 return (viq);
766 }
767
768 static void
virtio_queue_free(virtio_queue_t * viq)769 virtio_queue_free(virtio_queue_t *viq)
770 {
771 virtio_t *vio = viq->viq_virtio;
772
773 /*
774 * We are going to destroy the queue mutex. Make sure we've already
775 * removed the interrupt handlers.
776 */
777 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
778
779 mutex_enter(&viq->viq_mutex);
780
781 /*
782 * If the device has not already been reset as part of a shutdown,
783 * detach the queue from the device now.
784 */
785 if (!viq->viq_shutdown) {
786 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index);
787 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0);
788 }
789
790 virtio_dma_fini(&viq->viq_dma);
791
792 VERIFY(avl_is_empty(&viq->viq_inflight));
793 avl_destroy(&viq->viq_inflight);
794 if (viq->viq_descmap != NULL) {
795 id_space_destroy(viq->viq_descmap);
796 }
797
798 mutex_exit(&viq->viq_mutex);
799 mutex_destroy(&viq->viq_mutex);
800
801 kmem_free(viq, sizeof (*viq));
802 }
803
804 void
virtio_queue_no_interrupt(virtio_queue_t * viq,boolean_t stop_interrupts)805 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts)
806 {
807 mutex_enter(&viq->viq_mutex);
808
809 if (stop_interrupts) {
810 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT;
811 } else {
812 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT;
813 }
814 VIRTQ_DMA_SYNC_FORDEV(viq);
815
816 mutex_exit(&viq->viq_mutex);
817 }
818
819 static virtio_chain_t *
virtio_queue_complete(virtio_queue_t * viq,uint_t index)820 virtio_queue_complete(virtio_queue_t *viq, uint_t index)
821 {
822 VERIFY(MUTEX_HELD(&viq->viq_mutex));
823
824 virtio_chain_t *vic;
825
826 virtio_chain_t search;
827 bzero(&search, sizeof (search));
828 search.vic_head = index;
829
830 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) {
831 return (NULL);
832 }
833 avl_remove(&viq->viq_inflight, vic);
834
835 return (vic);
836 }
837
838 uint_t
virtio_queue_size(virtio_queue_t * viq)839 virtio_queue_size(virtio_queue_t *viq)
840 {
841 return (viq->viq_size);
842 }
843
844 uint_t
virtio_queue_nactive(virtio_queue_t * viq)845 virtio_queue_nactive(virtio_queue_t *viq)
846 {
847 mutex_enter(&viq->viq_mutex);
848 uint_t r = avl_numnodes(&viq->viq_inflight);
849 mutex_exit(&viq->viq_mutex);
850
851 return (r);
852 }
853
854 virtio_chain_t *
virtio_queue_poll(virtio_queue_t * viq)855 virtio_queue_poll(virtio_queue_t *viq)
856 {
857 mutex_enter(&viq->viq_mutex);
858 if (viq->viq_shutdown) {
859 /*
860 * The device has been reset by virtio_shutdown(), and queue
861 * processing has been halted. Any previously submitted chains
862 * will be evacuated using virtio_queue_evacuate().
863 */
864 mutex_exit(&viq->viq_mutex);
865 return (NULL);
866 }
867
868 VIRTQ_DMA_SYNC_FORKERNEL(viq);
869 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) {
870 /*
871 * If the device index has not changed since the last poll,
872 * there are no new chains to process.
873 */
874 mutex_exit(&viq->viq_mutex);
875 return (NULL);
876 }
877
878 /*
879 * We need to ensure that all reads from the descriptor (vqde_ring[])
880 * and any referenced memory by the descriptor occur after we have read
881 * the descriptor index value above (vqde_index).
882 */
883 membar_consumer();
884
885 uint16_t index = (viq->viq_device_index++) % viq->viq_size;
886 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start;
887 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len;
888
889 virtio_chain_t *vic;
890 if ((vic = virtio_queue_complete(viq, start)) == NULL) {
891 /*
892 * We could not locate a chain for this descriptor index, which
893 * suggests that something has gone horribly wrong.
894 */
895 dev_err(viq->viq_virtio->vio_dip, CE_PANIC,
896 "queue \"%s\" ring entry %u (descriptor %u) has no chain",
897 viq->viq_name, (uint16_t)index, (uint16_t)start);
898 }
899
900 vic->vic_received_length = len;
901
902 mutex_exit(&viq->viq_mutex);
903
904 return (vic);
905 }
906
907 /*
908 * After a call to "virtio_shutdown()", the driver must retrieve any previously
909 * submitted chains and free any associated resources.
910 */
911 virtio_chain_t *
virtio_queue_evacuate(virtio_queue_t * viq)912 virtio_queue_evacuate(virtio_queue_t *viq)
913 {
914 virtio_t *vio = viq->viq_virtio;
915
916 mutex_enter(&vio->vio_mutex);
917 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) {
918 dev_err(vio->vio_dip, CE_PANIC,
919 "virtio_queue_evacuate() without virtio_shutdown()");
920 }
921 mutex_exit(&vio->vio_mutex);
922
923 mutex_enter(&viq->viq_mutex);
924 VERIFY(viq->viq_shutdown);
925
926 virtio_chain_t *vic = avl_first(&viq->viq_inflight);
927 if (vic != NULL) {
928 avl_remove(&viq->viq_inflight, vic);
929 }
930
931 mutex_exit(&viq->viq_mutex);
932
933 return (vic);
934 }
935
936 /*
937 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT
938 */
939
940 /*
941 * When the device returns a descriptor chain to the driver, it may provide the
942 * length in bytes of data written into the chain. Client drivers should use
943 * this value with care; the specification suggests some device implementations
944 * have not always provided a useful or correct value.
945 */
946 size_t
virtio_chain_received_length(virtio_chain_t * vic)947 virtio_chain_received_length(virtio_chain_t *vic)
948 {
949 return (vic->vic_received_length);
950 }
951
952 /*
953 * Allocate a descriptor chain for use with this queue. The "kmflags" value
954 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F).
955 */
956 virtio_chain_t *
virtio_chain_alloc(virtio_queue_t * viq,int kmflags)957 virtio_chain_alloc(virtio_queue_t *viq, int kmflags)
958 {
959 virtio_t *vio = viq->viq_virtio;
960 virtio_chain_t *vic;
961 uint_t cap;
962
963 /*
964 * Direct descriptors are known by their index in the descriptor table
965 * for the queue. We use the variable-length array member at the end
966 * of the chain tracking object to hold the list of direct descriptors
967 * assigned to this chain.
968 */
969 if (viq->viq_indirect) {
970 /*
971 * When using indirect descriptors we still need one direct
972 * descriptor entry to hold the physical address and length of
973 * the indirect descriptor table.
974 */
975 cap = 1;
976 } else {
977 /*
978 * For direct descriptors we need to be able to track a
979 * descriptor for each possible segment in a single chain.
980 */
981 cap = viq->viq_max_segs;
982 }
983
984 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap;
985 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) {
986 return (NULL);
987 }
988 vic->vic_vq = viq;
989 vic->vic_direct_capacity = cap;
990
991 if (viq->viq_indirect) {
992 /*
993 * Allocate an indirect descriptor list with the appropriate
994 * number of entries.
995 */
996 if (virtio_dma_init(vio, &vic->vic_indirect_dma,
997 sizeof (virtio_vq_desc_t) * viq->viq_max_segs,
998 &virtio_dma_attr_indirect,
999 DDI_DMA_CONSISTENT | DDI_DMA_WRITE,
1000 kmflags) != DDI_SUCCESS) {
1001 goto fail;
1002 }
1003
1004 /*
1005 * Allocate a single descriptor to hold the indirect list.
1006 * Leave the length as zero for now; it will be set to include
1007 * any occupied entries at push time.
1008 */
1009 mutex_enter(&viq->viq_mutex);
1010 if (virtio_chain_append_impl(vic,
1011 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0,
1012 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) {
1013 mutex_exit(&viq->viq_mutex);
1014 goto fail;
1015 }
1016 mutex_exit(&viq->viq_mutex);
1017 VERIFY3U(vic->vic_direct_used, ==, 1);
1018
1019 /*
1020 * Don't set the indirect capacity until after we've installed
1021 * the direct descriptor which points at the indirect list, or
1022 * virtio_chain_append_impl() will be confused.
1023 */
1024 vic->vic_indirect_capacity = viq->viq_max_segs;
1025 }
1026
1027 return (vic);
1028
1029 fail:
1030 virtio_dma_fini(&vic->vic_indirect_dma);
1031 kmem_free(vic, vicsz);
1032 return (NULL);
1033 }
1034
1035 void *
virtio_chain_data(virtio_chain_t * vic)1036 virtio_chain_data(virtio_chain_t *vic)
1037 {
1038 return (vic->vic_data);
1039 }
1040
1041 void
virtio_chain_data_set(virtio_chain_t * vic,void * data)1042 virtio_chain_data_set(virtio_chain_t *vic, void *data)
1043 {
1044 vic->vic_data = data;
1045 }
1046
1047 void
virtio_chain_clear(virtio_chain_t * vic)1048 virtio_chain_clear(virtio_chain_t *vic)
1049 {
1050 if (vic->vic_indirect_capacity != 0) {
1051 /*
1052 * There should only be one direct descriptor, which points at
1053 * our indirect descriptor list. We don't want to clear it
1054 * here.
1055 */
1056 VERIFY3U(vic->vic_direct_capacity, ==, 1);
1057
1058 if (vic->vic_indirect_used > 0) {
1059 /*
1060 * Clear out the indirect descriptor table.
1061 */
1062 vic->vic_indirect_used = 0;
1063 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0),
1064 virtio_dma_size(&vic->vic_indirect_dma));
1065 }
1066
1067 } else if (vic->vic_direct_capacity > 0) {
1068 /*
1069 * Release any descriptors that were assigned to us previously.
1070 */
1071 for (uint_t i = 0; i < vic->vic_direct_used; i++) {
1072 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]);
1073 vic->vic_direct[i] = 0;
1074 }
1075 vic->vic_direct_used = 0;
1076 }
1077 }
1078
1079 void
virtio_chain_free(virtio_chain_t * vic)1080 virtio_chain_free(virtio_chain_t *vic)
1081 {
1082 /*
1083 * First ensure that we have released any descriptors used by this
1084 * chain.
1085 */
1086 virtio_chain_clear(vic);
1087
1088 if (vic->vic_indirect_capacity > 0) {
1089 /*
1090 * Release the direct descriptor that points to our indirect
1091 * descriptor list.
1092 */
1093 VERIFY3U(vic->vic_direct_capacity, ==, 1);
1094 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]);
1095
1096 virtio_dma_fini(&vic->vic_indirect_dma);
1097 }
1098
1099 size_t vicsz = sizeof (*vic) +
1100 vic->vic_direct_capacity * sizeof (uint16_t);
1101
1102 kmem_free(vic, vicsz);
1103 }
1104
1105 static inline int
virtio_queue_descmap_alloc(virtio_queue_t * viq,uint_t * indexp)1106 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp)
1107 {
1108 id_t index;
1109
1110 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) {
1111 return (ENOMEM);
1112 }
1113
1114 VERIFY3S(index, >=, 0);
1115 VERIFY3S(index, <=, viq->viq_size);
1116
1117 *indexp = (uint_t)index;
1118 return (0);
1119 }
1120
1121 static int
virtio_chain_append_impl(virtio_chain_t * vic,uint64_t pa,size_t len,uint16_t flags)1122 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len,
1123 uint16_t flags)
1124 {
1125 virtio_queue_t *viq = vic->vic_vq;
1126 virtio_vq_desc_t *vqd;
1127 uint_t index;
1128
1129 /*
1130 * We're modifying the queue-wide descriptor list so make sure we have
1131 * the appropriate lock.
1132 */
1133 VERIFY(MUTEX_HELD(&viq->viq_mutex));
1134
1135 if (vic->vic_indirect_capacity != 0) {
1136 /*
1137 * Use indirect descriptors.
1138 */
1139 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) {
1140 return (DDI_FAILURE);
1141 }
1142
1143 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0);
1144
1145 if ((index = vic->vic_indirect_used++) > 0) {
1146 /*
1147 * Chain the current last indirect descriptor to the
1148 * new one.
1149 */
1150 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT;
1151 vqd[index - 1].vqd_next = index;
1152 }
1153
1154 } else {
1155 /*
1156 * Use direct descriptors.
1157 */
1158 if (vic->vic_direct_used >= vic->vic_direct_capacity) {
1159 return (DDI_FAILURE);
1160 }
1161
1162 if (virtio_queue_descmap_alloc(viq, &index) != 0) {
1163 return (DDI_FAILURE);
1164 }
1165
1166 vqd = virtio_dma_va(&viq->viq_dma, 0);
1167
1168 if (vic->vic_direct_used > 0) {
1169 /*
1170 * This is not the first entry. Chain the current
1171 * descriptor to the next one.
1172 */
1173 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1];
1174
1175 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT;
1176 vqd[p].vqd_next = index;
1177 }
1178 vic->vic_direct[vic->vic_direct_used++] = index;
1179 }
1180
1181 vqd[index].vqd_addr = pa;
1182 vqd[index].vqd_len = len;
1183 vqd[index].vqd_flags = flags;
1184 vqd[index].vqd_next = 0;
1185
1186 return (DDI_SUCCESS);
1187 }
1188
1189 int
virtio_chain_append(virtio_chain_t * vic,uint64_t pa,size_t len,virtio_direction_t dir)1190 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len,
1191 virtio_direction_t dir)
1192 {
1193 virtio_queue_t *viq = vic->vic_vq;
1194 uint16_t flags = 0;
1195
1196 switch (dir) {
1197 case VIRTIO_DIR_DEVICE_WRITES:
1198 flags |= VIRTQ_DESC_F_WRITE;
1199 break;
1200
1201 case VIRTIO_DIR_DEVICE_READS:
1202 break;
1203
1204 default:
1205 panic("unknown direction value %u", dir);
1206 }
1207
1208 mutex_enter(&viq->viq_mutex);
1209 int r = virtio_chain_append_impl(vic, pa, len, flags);
1210 mutex_exit(&viq->viq_mutex);
1211
1212 return (r);
1213 }
1214
1215 static void
virtio_queue_flush_locked(virtio_queue_t * viq)1216 virtio_queue_flush_locked(virtio_queue_t *viq)
1217 {
1218 VERIFY(MUTEX_HELD(&viq->viq_mutex));
1219
1220 /*
1221 * Make sure any writes we have just made to the descriptors
1222 * (vqdr_ring[]) are visible to the device before we update the ring
1223 * pointer (vqdr_index).
1224 */
1225 membar_producer();
1226 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index;
1227 VIRTQ_DMA_SYNC_FORDEV(viq);
1228
1229 /*
1230 * Determine whether the device expects us to notify it of new
1231 * descriptors.
1232 */
1233 VIRTQ_DMA_SYNC_FORKERNEL(viq);
1234 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) {
1235 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY,
1236 viq->viq_index);
1237 }
1238 }
1239
1240 void
virtio_queue_flush(virtio_queue_t * viq)1241 virtio_queue_flush(virtio_queue_t *viq)
1242 {
1243 mutex_enter(&viq->viq_mutex);
1244 virtio_queue_flush_locked(viq);
1245 mutex_exit(&viq->viq_mutex);
1246 }
1247
1248 void
virtio_chain_submit(virtio_chain_t * vic,boolean_t flush)1249 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush)
1250 {
1251 virtio_queue_t *viq = vic->vic_vq;
1252
1253 mutex_enter(&viq->viq_mutex);
1254
1255 if (vic->vic_indirect_capacity != 0) {
1256 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0);
1257
1258 VERIFY3U(vic->vic_direct_used, ==, 1);
1259
1260 /*
1261 * This is an indirect descriptor queue. The length in bytes
1262 * of the descriptor must extend to cover the populated
1263 * indirect descriptor entries.
1264 */
1265 vqd[vic->vic_direct[0]].vqd_len =
1266 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used;
1267
1268 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV);
1269 }
1270
1271 /*
1272 * Populate the next available slot in the driver-owned ring for this
1273 * chain. The updated value of viq_driver_index is not yet visible to
1274 * the device until a subsequent queue flush.
1275 */
1276 uint16_t index = (viq->viq_driver_index++) % viq->viq_size;
1277 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0];
1278
1279 vic->vic_head = vic->vic_direct[0];
1280 avl_add(&viq->viq_inflight, vic);
1281
1282 if (flush) {
1283 virtio_queue_flush_locked(vic->vic_vq);
1284 }
1285
1286 mutex_exit(&viq->viq_mutex);
1287 }
1288
1289 /*
1290 * INTERRUPTS MANAGEMENT
1291 */
1292
1293 static const char *
virtio_interrupt_type_name(int type)1294 virtio_interrupt_type_name(int type)
1295 {
1296 switch (type) {
1297 case DDI_INTR_TYPE_MSIX:
1298 return ("MSI-X");
1299 case DDI_INTR_TYPE_MSI:
1300 return ("MSI");
1301 case DDI_INTR_TYPE_FIXED:
1302 return ("fixed");
1303 default:
1304 return ("?");
1305 }
1306 }
1307
1308 static int
virtio_interrupts_alloc(virtio_t * vio,int type,int nrequired)1309 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired)
1310 {
1311 dev_info_t *dip = vio->vio_dip;
1312 int nintrs = 0;
1313 int navail = 0;
1314
1315 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1316 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC));
1317
1318 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) {
1319 dev_err(dip, CE_WARN, "could not count %s interrupts",
1320 virtio_interrupt_type_name(type));
1321 return (DDI_FAILURE);
1322 }
1323 if (nintrs < 1) {
1324 dev_err(dip, CE_WARN, "no %s interrupts supported",
1325 virtio_interrupt_type_name(type));
1326 return (DDI_FAILURE);
1327 }
1328
1329 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) {
1330 dev_err(dip, CE_WARN, "could not count available %s interrupts",
1331 virtio_interrupt_type_name(type));
1332 return (DDI_FAILURE);
1333 }
1334 if (navail < nrequired) {
1335 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d "
1336 "available", nrequired, virtio_interrupt_type_name(type),
1337 navail);
1338 return (DDI_FAILURE);
1339 }
1340
1341 VERIFY3P(vio->vio_interrupts, ==, NULL);
1342 vio->vio_interrupts = kmem_zalloc(
1343 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP);
1344
1345 int r;
1346 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired,
1347 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) {
1348 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)",
1349 virtio_interrupt_type_name(type), r);
1350 kmem_free(vio->vio_interrupts,
1351 sizeof (ddi_intr_handle_t) * nrequired);
1352 vio->vio_interrupts = NULL;
1353 return (DDI_FAILURE);
1354 }
1355
1356 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC;
1357 vio->vio_interrupt_type = type;
1358 return (DDI_SUCCESS);
1359 }
1360
1361 static uint_t
virtio_shared_isr(caddr_t arg0,caddr_t arg1)1362 virtio_shared_isr(caddr_t arg0, caddr_t arg1)
1363 {
1364 virtio_t *vio = (virtio_t *)arg0;
1365 uint_t r = DDI_INTR_UNCLAIMED;
1366 uint8_t isr;
1367
1368 mutex_enter(&vio->vio_mutex);
1369
1370 /*
1371 * Check the ISR status to see if the interrupt applies to us. Reading
1372 * this field resets it to zero.
1373 */
1374 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS);
1375
1376 if ((isr & VIRTIO_ISR_CHECK_QUEUES) != 0) {
1377 r = DDI_INTR_CLAIMED;
1378
1379 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1380 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1381 if (viq->viq_func != NULL) {
1382 mutex_exit(&vio->vio_mutex);
1383 (void) viq->viq_func(viq->viq_funcarg, arg0);
1384 mutex_enter(&vio->vio_mutex);
1385
1386 if (vio->vio_initlevel &
1387 VIRTIO_INITLEVEL_SHUTDOWN) {
1388 /*
1389 * The device was shut down while in a
1390 * queue handler routine.
1391 */
1392 break;
1393 }
1394 }
1395 }
1396 }
1397
1398 mutex_exit(&vio->vio_mutex);
1399
1400 /*
1401 * vio_cfgchange_{handler,handlerarg} cannot change while interrupts
1402 * are configured so it is safe to access them outside of the lock.
1403 */
1404
1405 if ((isr & VIRTIO_ISR_CHECK_CONFIG) != 0) {
1406 r = DDI_INTR_CLAIMED;
1407 if (vio->vio_cfgchange_handler != NULL) {
1408 (void) vio->vio_cfgchange_handler(
1409 (caddr_t)vio->vio_cfgchange_handlerarg,
1410 (caddr_t)vio);
1411 }
1412 }
1413
1414 return (r);
1415 }
1416
1417 static int
virtio_interrupts_setup(virtio_t * vio,int allow_types)1418 virtio_interrupts_setup(virtio_t *vio, int allow_types)
1419 {
1420 dev_info_t *dip = vio->vio_dip;
1421 int types;
1422 int count = 0;
1423
1424 mutex_enter(&vio->vio_mutex);
1425
1426 /*
1427 * Determine the number of interrupts we'd like based on the number of
1428 * virtqueues.
1429 */
1430 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1431 viq = list_next(&vio->vio_queues, viq)) {
1432 if (viq->viq_func != NULL) {
1433 count++;
1434 }
1435 }
1436
1437 /*
1438 * If there is a configuration change handler, one extra interrupt
1439 * is needed for that.
1440 */
1441 if (vio->vio_cfgchange_handler != NULL)
1442 count++;
1443
1444 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) {
1445 dev_err(dip, CE_WARN, "could not get supported interrupts");
1446 mutex_exit(&vio->vio_mutex);
1447 return (DDI_FAILURE);
1448 }
1449
1450 if (allow_types != VIRTIO_ANY_INTR_TYPE) {
1451 /*
1452 * Restrict the possible interrupt types at the request of the
1453 * driver.
1454 */
1455 types &= allow_types;
1456 }
1457
1458 /*
1459 * Try each potential interrupt type in descending order of preference.
1460 * Note that the specification does not appear to allow for the use of
1461 * classical MSI, so we are limited to either MSI-X or fixed
1462 * interrupts.
1463 */
1464 if (types & DDI_INTR_TYPE_MSIX) {
1465 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX,
1466 count) == DDI_SUCCESS) {
1467 goto add_handlers;
1468 }
1469 }
1470 if (types & DDI_INTR_TYPE_FIXED) {
1471 /*
1472 * If fixed interrupts are all that are available, we'll just
1473 * ask for one.
1474 */
1475 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) ==
1476 DDI_SUCCESS) {
1477 goto add_handlers;
1478 }
1479 }
1480
1481 dev_err(dip, CE_WARN, "interrupt allocation failed");
1482 mutex_exit(&vio->vio_mutex);
1483 return (DDI_FAILURE);
1484
1485 add_handlers:
1486 /*
1487 * Ensure that we have not been given any high-level interrupts as our
1488 * interrupt handlers do not support them.
1489 */
1490 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1491 uint_t ipri;
1492
1493 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) !=
1494 DDI_SUCCESS) {
1495 dev_err(dip, CE_WARN, "could not determine interrupt "
1496 "priority");
1497 goto fail;
1498 }
1499
1500 if (ipri >= ddi_intr_get_hilevel_pri()) {
1501 dev_err(dip, CE_WARN, "high level interrupts not "
1502 "supported");
1503 goto fail;
1504 }
1505
1506 /*
1507 * Record the highest priority we've been allocated to use for
1508 * mutex initialisation.
1509 */
1510 if (i == 0 || ipri > vio->vio_interrupt_priority) {
1511 vio->vio_interrupt_priority = ipri;
1512 }
1513 }
1514
1515 /*
1516 * Get the interrupt capabilities from the first handle to determine
1517 * whether we need to use ddi_intr_block_enable(9F).
1518 */
1519 if (ddi_intr_get_cap(vio->vio_interrupts[0],
1520 &vio->vio_interrupt_cap) != DDI_SUCCESS) {
1521 dev_err(dip, CE_WARN, "failed to get interrupt capabilities");
1522 goto fail;
1523 }
1524
1525 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1526 VERIFY3S(vio->vio_ninterrupts, ==, 1);
1527 /*
1528 * For fixed interrupts, we need to use our shared handler to
1529 * multiplex the per-queue handlers provided by the driver.
1530 */
1531 if (ddi_intr_add_handler(vio->vio_interrupts[0],
1532 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) {
1533 dev_err(dip, CE_WARN, "adding shared %s interrupt "
1534 "handler failed", virtio_interrupt_type_name(
1535 vio->vio_interrupt_type));
1536 goto fail;
1537 }
1538
1539 goto done;
1540 }
1541
1542 VERIFY3S(vio->vio_ninterrupts, ==, count);
1543
1544 uint_t n = 0;
1545
1546 /* Bind the configuration vector interrupt */
1547 if (vio->vio_cfgchange_handler != NULL) {
1548 if (ddi_intr_add_handler(vio->vio_interrupts[n],
1549 vio->vio_cfgchange_handler,
1550 (caddr_t)vio->vio_cfgchange_handlerarg,
1551 (caddr_t)vio) != DDI_SUCCESS) {
1552 dev_err(dip, CE_WARN,
1553 "adding configuration change interrupt failed");
1554 goto fail;
1555 }
1556 vio->vio_cfgchange_handler_added = B_TRUE;
1557 vio->vio_cfgchange_handler_index = n;
1558 n++;
1559 }
1560
1561 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1562 viq = list_next(&vio->vio_queues, viq)) {
1563 if (viq->viq_func == NULL) {
1564 continue;
1565 }
1566
1567 if (ddi_intr_add_handler(vio->vio_interrupts[n],
1568 viq->viq_func, (caddr_t)viq->viq_funcarg,
1569 (caddr_t)vio) != DDI_SUCCESS) {
1570 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed",
1571 n, viq->viq_name);
1572 goto fail;
1573 }
1574
1575 viq->viq_handler_index = n;
1576 viq->viq_handler_added = B_TRUE;
1577 n++;
1578 }
1579
1580 done:
1581 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED;
1582 mutex_exit(&vio->vio_mutex);
1583 return (DDI_SUCCESS);
1584
1585 fail:
1586 virtio_interrupts_teardown(vio);
1587 mutex_exit(&vio->vio_mutex);
1588 return (DDI_FAILURE);
1589 }
1590
1591 static void
virtio_interrupts_teardown(virtio_t * vio)1592 virtio_interrupts_teardown(virtio_t *vio)
1593 {
1594 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1595
1596 virtio_interrupts_disable_locked(vio);
1597
1598 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1599 /*
1600 * Remove the multiplexing interrupt handler.
1601 */
1602 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) {
1603 int r;
1604
1605 VERIFY3S(vio->vio_ninterrupts, ==, 1);
1606
1607 if ((r = ddi_intr_remove_handler(
1608 vio->vio_interrupts[0])) != DDI_SUCCESS) {
1609 dev_err(vio->vio_dip, CE_WARN, "removing "
1610 "shared interrupt handler failed (%d)", r);
1611 }
1612 }
1613 } else {
1614 /*
1615 * Remove the configuration vector interrupt handler.
1616 */
1617 if (vio->vio_cfgchange_handler_added) {
1618 int r;
1619
1620 if ((r = ddi_intr_remove_handler(
1621 vio->vio_interrupts[0])) != DDI_SUCCESS) {
1622 dev_err(vio->vio_dip, CE_WARN,
1623 "removing configuration change interrupt "
1624 "handler failed (%d)", r);
1625 }
1626 vio->vio_cfgchange_handler_added = B_FALSE;
1627 }
1628
1629 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1630 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1631 int r;
1632
1633 if (!viq->viq_handler_added) {
1634 continue;
1635 }
1636
1637 if ((r = ddi_intr_remove_handler(
1638 vio->vio_interrupts[viq->viq_handler_index])) !=
1639 DDI_SUCCESS) {
1640 dev_err(vio->vio_dip, CE_WARN, "removing "
1641 "interrupt handler (%s) failed (%d)",
1642 viq->viq_name, r);
1643 }
1644
1645 viq->viq_handler_added = B_FALSE;
1646 }
1647 }
1648 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED;
1649
1650 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) {
1651 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1652 int r;
1653
1654 if ((r = ddi_intr_free(vio->vio_interrupts[i])) !=
1655 DDI_SUCCESS) {
1656 dev_err(vio->vio_dip, CE_WARN, "freeing "
1657 "interrupt %u failed (%d)", i, r);
1658 }
1659 }
1660 kmem_free(vio->vio_interrupts,
1661 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts);
1662 vio->vio_interrupts = NULL;
1663 vio->vio_ninterrupts = 0;
1664 vio->vio_interrupt_type = 0;
1665 vio->vio_interrupt_cap = 0;
1666 vio->vio_interrupt_priority = 0;
1667
1668 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC;
1669 }
1670 }
1671
1672 static void
virtio_interrupts_unwind(virtio_t * vio)1673 virtio_interrupts_unwind(virtio_t *vio)
1674 {
1675 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1676
1677 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1678 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1679 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1680 if (!viq->viq_handler_added) {
1681 continue;
1682 }
1683
1684 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT,
1685 viq->viq_index);
1686 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE,
1687 VIRTIO_LEGACY_MSI_NO_VECTOR);
1688 }
1689
1690 if (vio->vio_cfgchange_handler_added) {
1691 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG,
1692 VIRTIO_LEGACY_MSI_NO_VECTOR);
1693 }
1694 }
1695
1696 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1697 (void) ddi_intr_block_disable(vio->vio_interrupts,
1698 vio->vio_ninterrupts);
1699 } else {
1700 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1701 (void) ddi_intr_disable(vio->vio_interrupts[i]);
1702 }
1703 }
1704
1705 /*
1706 * Disabling the interrupts makes the MSI-X fields disappear from the
1707 * BAR once more.
1708 */
1709 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET;
1710 }
1711
1712 int
virtio_interrupts_enable(virtio_t * vio)1713 virtio_interrupts_enable(virtio_t *vio)
1714 {
1715 mutex_enter(&vio->vio_mutex);
1716 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) {
1717 mutex_exit(&vio->vio_mutex);
1718 return (DDI_SUCCESS);
1719 }
1720
1721 int r = DDI_SUCCESS;
1722 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1723 r = ddi_intr_block_enable(vio->vio_interrupts,
1724 vio->vio_ninterrupts);
1725 } else {
1726 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1727 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) !=
1728 DDI_SUCCESS) {
1729 /*
1730 * Disable the interrupts we have enabled so
1731 * far.
1732 */
1733 for (i--; i >= 0; i--) {
1734 (void) ddi_intr_disable(
1735 vio->vio_interrupts[i]);
1736 }
1737 break;
1738 }
1739 }
1740 }
1741
1742 if (r != DDI_SUCCESS) {
1743 mutex_exit(&vio->vio_mutex);
1744 return (r);
1745 }
1746
1747 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1748 /*
1749 * When asked to enable the interrupts, the system enables
1750 * MSI-X in the PCI configuration for the device. While
1751 * enabled, the extra MSI-X configuration table fields appear
1752 * between the general and the device-specific regions of the
1753 * BAR.
1754 */
1755 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX;
1756
1757 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1758 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1759 if (!viq->viq_handler_added) {
1760 continue;
1761 }
1762
1763 uint16_t qi = viq->viq_index;
1764 uint16_t msi = viq->viq_handler_index;
1765
1766 /*
1767 * Route interrupts for this queue to the assigned
1768 * MSI-X vector number.
1769 */
1770 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi);
1771 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi);
1772
1773 /*
1774 * The device may not actually accept the vector number
1775 * we're attempting to program. We need to confirm
1776 * that configuration was successful by re-reading the
1777 * configuration we just wrote.
1778 */
1779 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) !=
1780 msi) {
1781 dev_err(vio->vio_dip, CE_WARN,
1782 "failed to configure MSI-X vector %u for "
1783 "queue \"%s\" (#%u)", (uint_t)msi,
1784 viq->viq_name, (uint_t)qi);
1785
1786 virtio_interrupts_unwind(vio);
1787 mutex_exit(&vio->vio_mutex);
1788 return (DDI_FAILURE);
1789 }
1790 }
1791
1792 if (vio->vio_cfgchange_handler_added) {
1793 virtio_put16(vio, VIRTIO_LEGACY_MSIX_CONFIG,
1794 vio->vio_cfgchange_handler_index);
1795
1796 /* Verify the value was accepted. */
1797 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_CONFIG) !=
1798 vio->vio_cfgchange_handler_index) {
1799 dev_err(vio->vio_dip, CE_WARN,
1800 "failed to configure MSI-X vector for "
1801 "configuration");
1802
1803 virtio_interrupts_unwind(vio);
1804 mutex_exit(&vio->vio_mutex);
1805 return (DDI_FAILURE);
1806 }
1807 }
1808 }
1809
1810 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED;
1811
1812 mutex_exit(&vio->vio_mutex);
1813 return (DDI_SUCCESS);
1814 }
1815
1816 static void
virtio_interrupts_disable_locked(virtio_t * vio)1817 virtio_interrupts_disable_locked(virtio_t *vio)
1818 {
1819 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1820
1821 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) {
1822 return;
1823 }
1824
1825 virtio_interrupts_unwind(vio);
1826
1827 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED;
1828 }
1829
1830 void
virtio_interrupts_disable(virtio_t * vio)1831 virtio_interrupts_disable(virtio_t *vio)
1832 {
1833 mutex_enter(&vio->vio_mutex);
1834 virtio_interrupts_disable_locked(vio);
1835 mutex_exit(&vio->vio_mutex);
1836 }
1837