1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
15 * Copyright 2025 Oxide Computer Company
16 * Copyright 2026 Hans Rosenfeld
17 */
18
19 /*
20 * VIRTIO FRAMEWORK
21 *
22 * For design and usage documentation, see the comments in "virtio.h".
23 */
24
25 #include <sys/conf.h>
26 #include <sys/kmem.h>
27 #include <sys/debug.h>
28 #include <sys/modctl.h>
29 #include <sys/autoconf.h>
30 #include <sys/ddi_impldefs.h>
31 #include <sys/ddi.h>
32 #include <sys/inttypes.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/avintr.h>
36 #include <sys/spl.h>
37 #include <sys/promif.h>
38 #include <sys/list.h>
39 #include <sys/bootconf.h>
40 #include <sys/bootsvcs.h>
41 #include <sys/sysmacros.h>
42 #include <sys/pci.h>
43 #include <sys/pci_cap.h>
44 #include <sys/stdbit.h>
45
46 #include "virtio.h"
47 #include "virtio_impl.h"
48 #include "virtio_endian.h"
49
50
51 /*
52 * Linkage structures
53 */
54 static struct modlmisc virtio_modlmisc = {
55 .misc_modops = &mod_miscops,
56 .misc_linkinfo = "VIRTIO common routines",
57 };
58
59 static struct modlinkage virtio_modlinkage = {
60 .ml_rev = MODREV_1,
61 .ml_linkage = { &virtio_modlmisc, NULL }
62 };
63
64 int
_init(void)65 _init(void)
66 {
67 return (mod_install(&virtio_modlinkage));
68 }
69
70 int
_fini(void)71 _fini(void)
72 {
73 return (mod_remove(&virtio_modlinkage));
74 }
75
76 int
_info(struct modinfo * modinfop)77 _info(struct modinfo *modinfop)
78 {
79 return (mod_info(&virtio_modlinkage, modinfop));
80 }
81
82 static void virtio_unmap_cap(virtio_t *, virtio_pci_cap_t *);
83 static boolean_t virtio_map_cap(virtio_t *, virtio_pci_cap_t *);
84 static void virtio_discover_pci_caps(virtio_t *, ddi_acc_handle_t);
85 static void virtio_set_status(virtio_t *, uint8_t);
86 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t,
87 uint16_t);
88 static int virtio_interrupts_setup(virtio_t *, int);
89 static void virtio_interrupts_teardown(virtio_t *);
90 static void virtio_interrupts_disable_locked(virtio_t *);
91 static void virtio_queue_free(virtio_queue_t *);
92 static int virtio_bar_to_rnumber(virtio_t *, uint8_t);
93
94 /*
95 * Tuneable that forces use of the legacy interface even if the hypervisor
96 * presents transitional devices. It has no effect if only a modern device is
97 * presented.
98 */
99 int virtio_force_legacy = 0;
100
101 /*
102 * We use the same device access attributes for BAR mapping and access to the
103 * virtqueue memory.
104 */
105 ddi_device_acc_attr_t virtio_acc_attr = {
106 .devacc_attr_version = DDI_DEVICE_ATTR_V1,
107 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC,
108 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC,
109 .devacc_attr_access = DDI_DEFAULT_ACC
110 };
111
112
113 /*
114 * DMA attributes for the memory given to the device for queue management.
115 */
116 ddi_dma_attr_t virtio_dma_attr_queue = {
117 .dma_attr_version = DMA_ATTR_V0,
118 .dma_attr_addr_lo = 0x0000000000000000,
119 /*
120 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted
121 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a
122 * 32-bit register.
123 */
124 .dma_attr_addr_hi = 0x00000FFFFFFFF000,
125 .dma_attr_count_max = 0x00000000FFFFFFFF,
126 .dma_attr_align = VIRTIO_PAGE_SIZE,
127 .dma_attr_burstsizes = 1,
128 .dma_attr_minxfer = 1,
129 .dma_attr_maxxfer = 0x00000000FFFFFFFF,
130 .dma_attr_seg = 0x00000000FFFFFFFF,
131 .dma_attr_sgllen = 1,
132 .dma_attr_granular = 1,
133 .dma_attr_flags = 0
134 };
135
136 /*
137 * DMA attributes for the the allocation of indirect descriptor lists. The
138 * indirect list is referenced by a regular descriptor entry: the physical
139 * address field is 64 bits wide, but the length field is only 32 bits. Each
140 * descriptor is 16 bytes long.
141 */
142 ddi_dma_attr_t virtio_dma_attr_indirect = {
143 .dma_attr_version = DMA_ATTR_V0,
144 .dma_attr_addr_lo = 0x0000000000000000,
145 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF,
146 .dma_attr_count_max = 0x00000000FFFFFFFF,
147 .dma_attr_align = sizeof (struct virtio_vq_desc),
148 .dma_attr_burstsizes = 1,
149 .dma_attr_minxfer = 1,
150 .dma_attr_maxxfer = 0x00000000FFFFFFFF,
151 .dma_attr_seg = 0x00000000FFFFFFFF,
152 .dma_attr_sgllen = 1,
153 .dma_attr_granular = 1,
154 .dma_attr_flags = 0
155 };
156
157
158 void
virtio_fini(virtio_t * vio,boolean_t failed)159 virtio_fini(virtio_t *vio, boolean_t failed)
160 {
161 mutex_enter(&vio->vio_mutex);
162
163 virtio_interrupts_teardown(vio);
164
165 virtio_queue_t *viq;
166 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) {
167 virtio_queue_free(viq);
168 }
169 list_destroy(&vio->vio_queues);
170 mutex_destroy(&vio->vio_qlock);
171
172 if (failed) {
173 /*
174 * Signal to the host that device setup failed.
175 */
176 vio->vio_ops->vop_set_status_locked(vio, VIRTIO_STATUS_FAILED);
177 } else {
178 vio->vio_ops->vop_device_reset_locked(vio);
179 }
180
181 /*
182 * We don't need to do anything for the provider initlevel, as it
183 * merely records the fact that virtio_init_complete() was called.
184 */
185 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER;
186
187 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) {
188 /*
189 * Unmap PCI BARs
190 */
191 if (vio->vio_bar != NULL)
192 ddi_regs_map_free(&vio->vio_barh);
193
194 virtio_unmap_cap(vio, &vio->vio_cap_common);
195 virtio_unmap_cap(vio, &vio->vio_cap_notify);
196 virtio_unmap_cap(vio, &vio->vio_cap_isr);
197 virtio_unmap_cap(vio, &vio->vio_cap_device);
198
199 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS;
200 }
201
202 /*
203 * Ensure we have torn down everything we set up.
204 */
205 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN;
206 VERIFY0(vio->vio_initlevel);
207
208 mutex_exit(&vio->vio_mutex);
209 mutex_destroy(&vio->vio_mutex);
210
211 kmem_free(vio, sizeof (*vio));
212 }
213
214 /*
215 * Early device initialisation for virtio devices.
216 */
217 virtio_t *
virtio_init(dev_info_t * dip)218 virtio_init(dev_info_t *dip)
219 {
220 /*
221 * First, let's see what kind of device this is.
222 */
223 ddi_acc_handle_t pci;
224 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) {
225 dev_err(dip, CE_WARN, "pci_config_setup failed");
226 return (NULL);
227 }
228
229 uint16_t devid;
230 if ((devid = pci_config_get16(pci, PCI_CONF_DEVID)) == PCI_EINVAL16) {
231 dev_err(dip, CE_WARN, "could not read config space devid");
232 pci_config_teardown(&pci);
233 return (NULL);
234 }
235
236 uint8_t revid;
237 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) {
238 dev_err(dip, CE_WARN, "could not read config space revid");
239 pci_config_teardown(&pci);
240 return (NULL);
241 }
242
243 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP);
244 vio->vio_dip = dip;
245
246 virtio_discover_pci_caps(vio, pci);
247 pci_config_teardown(&pci);
248
249 /*
250 * In order to operate over the modern interface we must have found a
251 * minimum set of capabiities.
252 */
253 boolean_t found_modern_caps =
254 (vio->vio_cap_common.vpc_type != 0 &&
255 vio->vio_cap_notify.vpc_type != 0 &&
256 vio->vio_cap_isr.vpc_type != 0 &&
257 vio->vio_cap_device.vpc_type != 0);
258
259 if (devid >= VIRTIO_MIN_MODERN_DEVID) {
260 /*
261 * This is a purely "modern" device. If we haven't found the
262 * required PCI capabilities then we can't proceed.
263 */
264 if (!found_modern_caps) {
265 dev_err(dip, CE_WARN,
266 "Did not find required PCI capabilities for a "
267 " modern VirtIO device");
268 kmem_free(vio, sizeof (*vio));
269 return (NULL);
270 }
271
272 /*
273 * There is nothing else that is mandatory for a modern device
274 * that we can check.
275 */
276 vio->vio_mode = VIRTIO_MODE_MODERN;
277 vio->vio_ops = &virtio_modern_ops;
278 } else {
279 /*
280 * This could be a pure "legacy" or a "transitional" device.
281 * In either case the specification requires that the device
282 * advertise as PCI Revision 0.
283 */
284 if (revid != 0) {
285 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for "
286 "transitional or legacy virtio device",
287 (uint_t)revid);
288 kmem_free(vio, sizeof (*vio));
289 return (NULL);
290 }
291
292 /*
293 * If we found the modern PCI capabilities then we're
294 * transitional, otherwise we're legacy. We will always
295 * choose to use the modern interfaces on a transitional
296 * device. Ostensibly the VIRTIO_F_VERSION_1 flag is intended
297 * to help with this decision, but it is only visible through
298 * the modern interface!
299 */
300 if (found_modern_caps && virtio_force_legacy == 0) {
301 vio->vio_mode = VIRTIO_MODE_TRANSITIONAL;
302 vio->vio_ops = &virtio_modern_ops;
303 } else {
304 vio->vio_mode = VIRTIO_MODE_LEGACY;
305 vio->vio_ops = &virtio_legacy_ops;
306 }
307 }
308
309 if (vio->vio_mode == VIRTIO_MODE_LEGACY) {
310 int rnumber = virtio_bar_to_rnumber(vio, VIRTIO_LEGACY_BAR);
311
312 /*
313 * Map PCI BAR0 for legacy device access.
314 */
315 if (rnumber == -1 || ddi_regs_map_setup(dip, rnumber,
316 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr,
317 &vio->vio_barh) != DDI_SUCCESS) {
318 dev_err(dip, CE_WARN, "Failed to map BAR0");
319 kmem_free(vio, sizeof (*vio));
320 return (NULL);
321 }
322 } else {
323 /*
324 * Map the BAR regions required for the modern interface.
325 */
326 if (!virtio_map_cap(vio, &vio->vio_cap_common) ||
327 !virtio_map_cap(vio, &vio->vio_cap_notify) ||
328 !virtio_map_cap(vio, &vio->vio_cap_isr) ||
329 !virtio_map_cap(vio, &vio->vio_cap_device)) {
330 kmem_free(vio, sizeof (*vio));
331 return (NULL);
332 }
333 }
334 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS;
335
336 /*
337 * We initialise the mutex without an interrupt priority to ease the
338 * implementation of some of the configuration space access routines.
339 * Drivers using the virtio framework MUST make a call to
340 * "virtio_init_complete()" prior to spawning other threads or enabling
341 * interrupt handlers, at which time we will destroy and reinitialise
342 * the mutex for use in our interrupt handlers.
343 */
344 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL);
345
346 list_create(&vio->vio_queues, sizeof (virtio_queue_t),
347 offsetof(virtio_queue_t, viq_link));
348 mutex_init(&vio->vio_qlock, NULL, MUTEX_DRIVER, NULL);
349 vio->vio_qcur = UINT16_MAX;
350
351 /*
352 * Virtio devices require a few common steps before we can negotiate
353 * device features.
354 */
355 virtio_device_reset(vio);
356 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE);
357 virtio_set_status(vio, VIRTIO_STATUS_DRIVER);
358
359 vio->vio_features_device = vio->vio_ops->vop_device_get_features(vio);
360 vio->vio_features = vio->vio_features_device;
361
362 return (vio);
363 }
364
365 boolean_t
virtio_init_features(virtio_t * vio,uint64_t driver_features,boolean_t allow_indirect)366 virtio_init_features(virtio_t *vio, uint64_t driver_features,
367 boolean_t allow_indirect)
368 {
369 if (!virtio_modern(vio) && driver_features >> 32 != 0) {
370 dev_err(vio->vio_dip, CE_WARN,
371 "driver programming error; high bits set in features");
372 return (B_FALSE);
373 }
374
375 if (allow_indirect)
376 driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
377 if (virtio_modern(vio))
378 driver_features |= VIRTIO_F_VERSION_1;
379
380 vio->vio_features &= driver_features;
381
382 if (!vio->vio_ops->vop_device_set_features(vio, vio->vio_features)) {
383 dev_err(vio->vio_dip, CE_WARN, "feature negotiation failed");
384 return (B_FALSE);
385 }
386
387 /*
388 * With the legacy interface the device-specific configuration begins
389 * at an offset into the BAR that depends on whether we have enabled
390 * MSI-X interrupts or not. Start out with the offset for pre-MSI-X
391 * operation so that we can read device configuration space prior to
392 * configuring interrupts.
393 */
394 if (!virtio_modern(vio))
395 vio->vio_legacy_cfg_offset = VIRTIO_LEGACY_CFG_OFFSET;
396
397 return (B_TRUE);
398 }
399
400 /*
401 * Some virtio devices can change their device configuration state at any
402 * time. This function may be called by the driver during the initialisation
403 * phase - before calling virtio_init_complete() - in order to register a
404 * handler function which will be called when the device configuration space
405 * is updated.
406 */
407 void
virtio_register_cfgchange_handler(virtio_t * vio,ddi_intr_handler_t * func,void * funcarg)408 virtio_register_cfgchange_handler(virtio_t *vio, ddi_intr_handler_t *func,
409 void *funcarg)
410 {
411 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
412 VERIFY(!vio->vio_cfgchange_handler_added);
413
414 mutex_enter(&vio->vio_mutex);
415 vio->vio_cfgchange_handler = func;
416 vio->vio_cfgchange_handlerarg = funcarg;
417 mutex_exit(&vio->vio_mutex);
418 }
419
420 /*
421 * This function must be called by the driver once it has completed early setup
422 * calls. The value of "allowed_interrupt_types" is a mask of interrupt types
423 * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or
424 * the special value 0 to allow the system to use any available type.
425 */
426 int
virtio_init_complete(virtio_t * vio,int allowed_interrupt_types)427 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types)
428 {
429 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER));
430 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER;
431
432 if (!list_is_empty(&vio->vio_queues) ||
433 vio->vio_cfgchange_handler != NULL) {
434 /*
435 * Set up interrupts for the queues that have been registered.
436 */
437 if (virtio_interrupts_setup(vio, allowed_interrupt_types) !=
438 DDI_SUCCESS) {
439 return (DDI_FAILURE);
440 }
441 }
442
443 /*
444 * We can allocate the mutex once we know the priority.
445 */
446 mutex_destroy(&vio->vio_mutex);
447 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
448 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
449 viq = list_next(&vio->vio_queues, viq)) {
450 mutex_destroy(&viq->viq_mutex);
451 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER,
452 virtio_intr_pri(vio));
453 }
454
455 /*
456 * Enable the queues.
457 */
458 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
459 viq = list_next(&vio->vio_queues, viq)) {
460 vio->vio_ops->vop_queue_enable_set(vio, viq->viq_index, true);
461 }
462
463 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK);
464
465 return (DDI_SUCCESS);
466 }
467
468 boolean_t
virtio_features_present(virtio_t * vio,uint64_t feature_mask)469 virtio_features_present(virtio_t *vio, uint64_t feature_mask)
470 {
471 return ((vio->vio_features & feature_mask) == feature_mask);
472 }
473
474 uint32_t
virtio_features(virtio_t * vio)475 virtio_features(virtio_t *vio)
476 {
477 return (vio->vio_features);
478 }
479
480 boolean_t
virtio_modern(virtio_t * vio)481 virtio_modern(virtio_t *vio)
482 {
483 return (vio->vio_mode != VIRTIO_MODE_LEGACY);
484 }
485
486 void
virtio_acquireq(virtio_t * vio,uint16_t qidx)487 virtio_acquireq(virtio_t *vio, uint16_t qidx)
488 {
489 mutex_enter(&vio->vio_qlock);
490 if (vio->vio_qcur != qidx) {
491 vio->vio_ops->vop_queue_select(vio, qidx);
492 vio->vio_qcur = qidx;
493 }
494 }
495
496 void
virtio_releaseq(virtio_t * vio)497 virtio_releaseq(virtio_t *vio)
498 {
499 mutex_exit(&vio->vio_qlock);
500 }
501
502 void *
virtio_intr_pri(virtio_t * vio)503 virtio_intr_pri(virtio_t *vio)
504 {
505 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED);
506
507 return (DDI_INTR_PRI(vio->vio_interrupt_priority));
508 }
509
510 static void
virtio_unmap_cap(virtio_t * vio,virtio_pci_cap_t * cap)511 virtio_unmap_cap(virtio_t *vio, virtio_pci_cap_t *cap)
512 {
513 if (cap->vpc_type != 0 && cap->vpc_bar != NULL)
514 ddi_regs_map_free(&cap->vpc_barh);
515 }
516
517 static boolean_t
virtio_map_cap(virtio_t * vio,virtio_pci_cap_t * cap)518 virtio_map_cap(virtio_t *vio, virtio_pci_cap_t *cap)
519 {
520 static uint8_t baridx = UINT8_MAX;
521 static int rnumber = -1;
522
523 VERIFY(cap->vpc_type);
524
525 /*
526 * With most hypervisors all of the capabilities point to the same BAR
527 * so we can cache and re-use the corresponding register number.
528 * This function is only called serially from `virtio_init` during
529 * driver attach so it is safe to use static locals.
530 */
531 if (baridx != cap->vpc_baridx) {
532 baridx = cap->vpc_baridx;
533 rnumber = virtio_bar_to_rnumber(vio, baridx);
534 }
535
536 if (rnumber == -1 || ddi_regs_map_setup(vio->vio_dip, rnumber,
537 (caddr_t *)&cap->vpc_bar, cap->vpc_offset, cap->vpc_size,
538 &virtio_acc_attr, &cap->vpc_barh) != DDI_SUCCESS) {
539 dev_err(vio->vio_dip, CE_WARN,
540 "Failed to map CAP %u @ "
541 "BAR%u 0x%" PRIx64 "+%" PRIx64,
542 cap->vpc_type, cap->vpc_baridx,
543 cap->vpc_offset, cap->vpc_size);
544 return (B_FALSE);
545 }
546
547 return (B_TRUE);
548 }
549
550 /*
551 * Devices which are capable of operating via the "modern" VirtIO interface,
552 * which includes "transitional" devices, present a number of PCI capabilities
553 * of the vendor-specific type.
554 */
555 static void
virtio_discover_pci_caps(virtio_t * vio,ddi_acc_handle_t pci)556 virtio_discover_pci_caps(virtio_t *vio, ddi_acc_handle_t pci)
557 {
558 uint16_t idx;
559
560 for (idx = 0; ; idx++) {
561 virtio_pci_cap_t *cap;
562 uint16_t base;
563 uint32_t id;
564
565 if (pci_cap_probe(pci, idx, &id, &base) != DDI_SUCCESS)
566 break;
567
568 /* The VirtIO caps are all of the "vendor-specific" type */
569 if (id != PCI_CAP_ID_VS)
570 continue;
571
572 uint8_t type = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
573 VIRTIO_PCI_CAP_TYPE);
574
575 uint8_t min_len = VIRTIO_PCI_CAP_BARLEN + sizeof (uint32_t);
576
577 /* We are currently only interested in the following types */
578 switch (type) {
579 case VPC_COMMON_CFG:
580 cap = &vio->vio_cap_common;
581 break;
582 case VPC_NOTIFY_CFG:
583 cap = &vio->vio_cap_notify;
584 /* The notify capability has an extra field */
585 min_len += sizeof (uint32_t);
586 break;
587 case VPC_ISR_CFG:
588 cap = &vio->vio_cap_isr;
589 break;
590 case VPC_DEVICE_CFG:
591 cap = &vio->vio_cap_device;
592 break;
593 default:
594 /* Not interested in this cap */
595 continue;
596 }
597
598 uint8_t caplen = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
599 VIRTIO_PCI_CAP_LEN);
600
601 /* Skip short capabilities */
602 if (caplen == PCI_EINVAL8 || caplen < min_len)
603 continue;
604
605 /*
606 * Devices can provide multiple versions of the same capability
607 * type which should be in order of preference. We skip
608 * duplicates and use the first instance of each type we find.
609 */
610 if (cap->vpc_type != 0)
611 continue;
612
613 cap->vpc_baridx = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
614 VIRTIO_PCI_CAP_BAR);
615 if (cap->vpc_type == PCI_EINVAL8)
616 continue;
617 cap->vpc_offset = pci_cap_get(pci, PCI_CAP_CFGSZ_32, idx, base,
618 VIRTIO_PCI_CAP_BAROFF);
619 if (cap->vpc_offset == PCI_EINVAL32)
620 continue;
621 cap->vpc_size = pci_cap_get(pci, PCI_CAP_CFGSZ_32, idx, base,
622 VIRTIO_PCI_CAP_BARLEN);
623 if (cap->vpc_size == PCI_EINVAL32)
624 continue;
625
626 /*
627 * The NOTIFY_CFG capability has an additional field which is
628 * the multiplier to use to find the correct offset in the BAR
629 * for each queue. It is permissable for this to be 0, in which
630 * case notifications for all queues are written to the start
631 * of the region.
632 */
633 if (type == VPC_NOTIFY_CFG) {
634 vio->vio_multiplier = pci_cap_get(pci, PCI_CAP_CFGSZ_32,
635 idx, base, VIRTIO_PCI_CAP_MULTIPLIER);
636 if (vio->vio_multiplier == PCI_EINVAL32)
637 continue;
638 }
639
640 /* Assigning the type marks this entry as valid */
641 cap->vpc_type = type;
642 }
643 }
644
645 /*
646 * Enable a bit in the device status register. Each bit signals a level of
647 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_*
648 * constants for "status". To zero the status field use virtio_device_reset().
649 */
650 static void
virtio_set_status(virtio_t * vio,uint8_t status)651 virtio_set_status(virtio_t *vio, uint8_t status)
652 {
653 mutex_enter(&vio->vio_mutex);
654 vio->vio_ops->vop_set_status_locked(vio, status);
655 mutex_exit(&vio->vio_mutex);
656 }
657
658 void
virtio_device_reset(virtio_t * vio)659 virtio_device_reset(virtio_t *vio)
660 {
661 mutex_enter(&vio->vio_mutex);
662 vio->vio_ops->vop_device_reset_locked(vio);
663 mutex_exit(&vio->vio_mutex);
664 }
665
666 /*
667 * Some queues are effectively long-polled; the driver submits a series of
668 * buffers and the device only returns them when there is data available.
669 * During detach, we need to coordinate the return of these buffers. Calling
670 * "virtio_shutdown()" will reset the device, then allow the removal of all
671 * buffers that were in flight at the time of shutdown via
672 * "virtio_queue_evacuate()".
673 */
674 void
virtio_shutdown(virtio_t * vio)675 virtio_shutdown(virtio_t *vio)
676 {
677 mutex_enter(&vio->vio_mutex);
678 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
679 /*
680 * Shutdown has been performed already.
681 */
682 mutex_exit(&vio->vio_mutex);
683 return;
684 }
685
686 /*
687 * First, mark all of the queues as shutdown. This will prevent any
688 * further activity.
689 */
690 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
691 viq = list_next(&vio->vio_queues, viq)) {
692 mutex_enter(&viq->viq_mutex);
693 viq->viq_shutdown = B_TRUE;
694 mutex_exit(&viq->viq_mutex);
695 }
696
697 /*
698 * Now, reset the device. This removes any queue configuration on the
699 * device side.
700 */
701 vio->vio_ops->vop_device_reset_locked(vio);
702 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN;
703 mutex_exit(&vio->vio_mutex);
704 }
705
706 /*
707 * Common implementation of quiesce(9E) for simple Virtio-based devices.
708 */
709 int
virtio_quiesce(virtio_t * vio)710 virtio_quiesce(virtio_t *vio)
711 {
712 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
713 /*
714 * Device has already been reset.
715 */
716 return (DDI_SUCCESS);
717 }
718
719 /*
720 * When we reset the device, it should immediately stop using any DMA
721 * memory we've previously passed to it. All queue configuration is
722 * discarded. This is good enough for quiesce(9E).
723 */
724 vio->vio_ops->vop_device_reset_locked(vio);
725
726 return (DDI_SUCCESS);
727 }
728
729 /*
730 * DEVICE-SPECIFIC REGISTER ACCESS
731 *
732 * Note that these functions take the mutex to avoid racing with interrupt
733 * enable/disable, when the device-specific offset can potentially change.
734 */
735
736 uint8_t
virtio_dev_getgen(virtio_t * vio)737 virtio_dev_getgen(virtio_t *vio)
738 {
739 return (vio->vio_ops->vop_device_cfg_gen(vio));
740 }
741
742 uint8_t
virtio_dev_get8(virtio_t * vio,uintptr_t offset)743 virtio_dev_get8(virtio_t *vio, uintptr_t offset)
744 {
745 return (vio->vio_ops->vop_device_cfg_get8(vio, offset));
746 }
747
748 uint16_t
virtio_dev_get16(virtio_t * vio,uintptr_t offset)749 virtio_dev_get16(virtio_t *vio, uintptr_t offset)
750 {
751 return (vio->vio_ops->vop_device_cfg_get16(vio, offset));
752 }
753
754 uint32_t
virtio_dev_get32(virtio_t * vio,uintptr_t offset)755 virtio_dev_get32(virtio_t *vio, uintptr_t offset)
756 {
757 return (vio->vio_ops->vop_device_cfg_get32(vio, offset));
758 }
759
760 uint64_t
virtio_dev_get64(virtio_t * vio,uintptr_t offset)761 virtio_dev_get64(virtio_t *vio, uintptr_t offset)
762 {
763 return (vio->vio_ops->vop_device_cfg_get64(vio, offset));
764 }
765
766 void
virtio_dev_put8(virtio_t * vio,uintptr_t offset,uint8_t value)767 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
768 {
769 vio->vio_ops->vop_device_cfg_put8(vio, offset, value);
770 }
771
772 void
virtio_dev_put16(virtio_t * vio,uintptr_t offset,uint16_t value)773 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
774 {
775 vio->vio_ops->vop_device_cfg_put16(vio, offset, value);
776 }
777
778 void
virtio_dev_put32(virtio_t * vio,uintptr_t offset,uint32_t value)779 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
780 {
781 vio->vio_ops->vop_device_cfg_put32(vio, offset, value);
782 }
783
784 /*
785 * VIRTQUEUE MANAGEMENT
786 */
787
788 static int
virtio_inflight_compar(const void * lp,const void * rp)789 virtio_inflight_compar(const void *lp, const void *rp)
790 {
791 const virtio_chain_t *l = lp;
792 const virtio_chain_t *r = rp;
793
794 if (l->vic_head < r->vic_head) {
795 return (-1);
796 } else if (l->vic_head > r->vic_head) {
797 return (1);
798 } else {
799 return (0);
800 }
801 }
802
803 virtio_queue_t *
virtio_queue_alloc(virtio_t * vio,uint16_t qidx,const char * name,ddi_intr_handler_t * func,void * funcarg,boolean_t force_direct,uint_t max_segs)804 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name,
805 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct,
806 uint_t max_segs)
807 {
808 char space_name[256];
809 uint64_t noff = 0;
810 uint16_t qsz;
811
812 if (max_segs < 1) {
813 /*
814 * Every descriptor, direct or indirect, needs to refer to at
815 * least one buffer.
816 */
817 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
818 "segment count must be at least 1", name, (uint_t)qidx);
819 return (NULL);
820 }
821
822 mutex_enter(&vio->vio_mutex);
823
824 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) {
825 /*
826 * Cannot configure any more queues once initial setup is
827 * complete and interrupts have been allocated.
828 */
829 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
830 "alloc after init complete", name, (uint_t)qidx);
831 mutex_exit(&vio->vio_mutex);
832 return (NULL);
833 }
834
835 qsz = vio->vio_ops->vop_queue_size_get(vio, qidx);
836 if (qsz == 0) {
837 /*
838 * A size of zero means the device does not have a queue with
839 * this index.
840 */
841 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
842 "does not exist on device", name, (uint_t)qidx);
843 mutex_exit(&vio->vio_mutex);
844 return (NULL);
845 }
846 /*
847 * There is no way to negotiate a different queue size for legacy
848 * devices. We must read and use the native queue size of the device.
849 * For devices using the modern interface we could choose to reduce
850 * the queue size; for now we write back the value advertised by the
851 * device unchanged.
852 */
853 if (vio->vio_ops->vop_queue_size_set != NULL)
854 vio->vio_ops->vop_queue_size_set(vio, qidx, qsz);
855
856 if (virtio_modern(vio)) {
857 noff = vio->vio_ops->vop_queue_noff_get(vio, qidx);
858 if (noff > vio->vio_cap_notify.vpc_size - sizeof (uint32_t)) {
859 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
860 "invalid notification offset 0x%" PRIx64 " "
861 "for notify region of size 0x%" PRIx64,
862 name, (uint_t)qidx,
863 noff, vio->vio_cap_notify.vpc_size);
864 return (NULL);
865 }
866 }
867
868 mutex_exit(&vio->vio_mutex);
869
870 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP);
871 viq->viq_virtio = vio;
872 viq->viq_name = name;
873 viq->viq_index = qidx;
874 viq->viq_size = qsz;
875 viq->viq_noff = noff;
876 viq->viq_func = func;
877 viq->viq_funcarg = funcarg;
878 viq->viq_max_segs = max_segs;
879 avl_create(&viq->viq_inflight, virtio_inflight_compar,
880 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node));
881
882 /*
883 * Allocate the mutex without an interrupt priority for now, as we do
884 * with "vio_mutex". We'll reinitialise it in
885 * "virtio_init_complete()".
886 */
887 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL);
888
889 if (virtio_features_present(vio, VIRTIO_F_RING_INDIRECT_DESC) &&
890 !force_direct) {
891 /*
892 * If we were able to negotiate the indirect descriptor
893 * feature, and the caller has not explicitly forced the use of
894 * direct descriptors, we'll allocate indirect descriptor lists
895 * for each chain.
896 */
897 viq->viq_indirect = B_TRUE;
898 }
899
900 /*
901 * Track descriptor usage in an identifier space.
902 */
903 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s",
904 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name);
905 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) {
906 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor "
907 "ID space");
908 virtio_queue_free(viq);
909 return (NULL);
910 }
911
912 /*
913 * For legacy devices, memory for the queue has a strict layout
914 * determined by the queue size, and with the device region
915 * starting on a fresh page. Modern and transitional devices have less
916 * stringent alignment requirements and virtqueues are more compact as
917 * a result.
918 */
919 const uint_t align = virtio_modern(vio) ? MODERN_VQ_ALIGN :
920 VIRTIO_PAGE_SIZE;
921
922 const size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz;
923 const size_t sz_driver = P2ROUNDUP_TYPED(sz_descs +
924 sizeof (virtio_vq_driver_t) +
925 sizeof (uint16_t) * qsz,
926 align, size_t);
927 const size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) +
928 sizeof (virtio_vq_elem_t) * qsz,
929 align, size_t);
930
931 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device,
932 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
933 KM_SLEEP) != DDI_SUCCESS) {
934 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue "
935 "DMA memory");
936 virtio_queue_free(viq);
937 return (NULL);
938 }
939
940 /*
941 * NOTE: The viq_dma_* members below are used by
942 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate
943 * offsets into the DMA allocation for partial synchronisation. If the
944 * ordering of, or relationship between, these pointers changes, the
945 * macros must be kept in sync.
946 */
947 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0);
948 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs);
949 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver);
950
951 /*
952 * Install in the per-device list of queues.
953 */
954 mutex_enter(&vio->vio_mutex);
955 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL;
956 chkvq = list_next(&vio->vio_queues, chkvq)) {
957 if (chkvq->viq_index == qidx) {
958 dev_err(vio->vio_dip, CE_WARN, "attempt to register "
959 "queue \"%s\" with same index (%d) as queue \"%s\"",
960 name, qidx, chkvq->viq_name);
961 mutex_exit(&vio->vio_mutex);
962 virtio_queue_free(viq);
963 return (NULL);
964 }
965 }
966 list_insert_tail(&vio->vio_queues, viq);
967
968 /*
969 * Ensure the zeroing of the queue memory is visible to the host before
970 * we inform the device of the queue address.
971 */
972 membar_producer();
973 VIRTQ_DMA_SYNC_FORDEV(viq);
974
975 const uint64_t pa = virtio_dma_cookie_pa(&viq->viq_dma, 0);
976 vio->vio_ops->vop_queue_addr_set(vio, qidx,
977 pa, pa + sz_descs, pa + sz_driver);
978
979 mutex_exit(&vio->vio_mutex);
980 return (viq);
981 }
982
983 static void
virtio_queue_free(virtio_queue_t * viq)984 virtio_queue_free(virtio_queue_t *viq)
985 {
986 virtio_t *vio = viq->viq_virtio;
987
988 /*
989 * We are going to destroy the queue mutex. Make sure we've already
990 * removed the interrupt handlers.
991 */
992 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
993
994 mutex_enter(&viq->viq_mutex);
995
996 /*
997 * If the device has not already been reset as part of a shutdown,
998 * detach the queue from the device now.
999 */
1000 if (!viq->viq_shutdown) {
1001 vio->vio_ops->vop_queue_enable_set(vio, viq->viq_index, false);
1002 vio->vio_ops->vop_queue_addr_set(vio, viq->viq_index, 0, 0, 0);
1003 }
1004
1005 virtio_dma_fini(&viq->viq_dma);
1006
1007 VERIFY(avl_is_empty(&viq->viq_inflight));
1008 avl_destroy(&viq->viq_inflight);
1009 if (viq->viq_descmap != NULL) {
1010 id_space_destroy(viq->viq_descmap);
1011 }
1012
1013 mutex_exit(&viq->viq_mutex);
1014 mutex_destroy(&viq->viq_mutex);
1015
1016 kmem_free(viq, sizeof (*viq));
1017 }
1018
1019 void
virtio_queue_no_interrupt(virtio_queue_t * viq,boolean_t stop_interrupts)1020 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts)
1021 {
1022 mutex_enter(&viq->viq_mutex);
1023
1024 if (stop_interrupts) {
1025 viq->viq_dma_driver->vqdr_flags |=
1026 viq_gtoh16(viq, VIRTQ_AVAIL_F_NO_INTERRUPT);
1027 } else {
1028 viq->viq_dma_driver->vqdr_flags &=
1029 viq_gtoh16(viq, ~VIRTQ_AVAIL_F_NO_INTERRUPT);
1030 }
1031 VIRTQ_DMA_SYNC_FORDEV(viq);
1032
1033 mutex_exit(&viq->viq_mutex);
1034 }
1035
1036 static virtio_chain_t *
virtio_queue_complete(virtio_queue_t * viq,uint_t index)1037 virtio_queue_complete(virtio_queue_t *viq, uint_t index)
1038 {
1039 VERIFY(MUTEX_HELD(&viq->viq_mutex));
1040
1041 virtio_chain_t *vic;
1042
1043 virtio_chain_t search;
1044 bzero(&search, sizeof (search));
1045 search.vic_head = index;
1046
1047 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) {
1048 return (NULL);
1049 }
1050 avl_remove(&viq->viq_inflight, vic);
1051
1052 return (vic);
1053 }
1054
1055 uint_t
virtio_queue_size(virtio_queue_t * viq)1056 virtio_queue_size(virtio_queue_t *viq)
1057 {
1058 return (viq->viq_size);
1059 }
1060
1061 uint_t
virtio_queue_nactive(virtio_queue_t * viq)1062 virtio_queue_nactive(virtio_queue_t *viq)
1063 {
1064 mutex_enter(&viq->viq_mutex);
1065 uint_t r = avl_numnodes(&viq->viq_inflight);
1066 mutex_exit(&viq->viq_mutex);
1067
1068 return (r);
1069 }
1070
1071 virtio_chain_t *
virtio_queue_poll(virtio_queue_t * viq)1072 virtio_queue_poll(virtio_queue_t *viq)
1073 {
1074 mutex_enter(&viq->viq_mutex);
1075 if (viq->viq_shutdown) {
1076 /*
1077 * The device has been reset by virtio_shutdown(), and queue
1078 * processing has been halted. Any previously submitted chains
1079 * will be evacuated using virtio_queue_evacuate().
1080 */
1081 mutex_exit(&viq->viq_mutex);
1082 return (NULL);
1083 }
1084
1085 VIRTQ_DMA_SYNC_FORKERNEL(viq);
1086 uint16_t dindex = viq_htog16(viq, viq->viq_dma_device->vqde_index);
1087 if (viq->viq_device_index == dindex) {
1088 /*
1089 * If the device index has not changed since the last poll,
1090 * there are no new chains to process.
1091 */
1092 mutex_exit(&viq->viq_mutex);
1093 return (NULL);
1094 }
1095
1096 /*
1097 * We need to ensure that all reads from the descriptor (vqde_ring[])
1098 * and any referenced memory by the descriptor occur after we have read
1099 * the descriptor index value above (vqde_index).
1100 */
1101 membar_consumer();
1102
1103 uint16_t index = (viq->viq_device_index++) % viq->viq_size;
1104 uint16_t start = viq_htog16(viq,
1105 viq->viq_dma_device->vqde_ring[index].vqe_start);
1106 uint32_t len = viq_htog32(viq,
1107 viq->viq_dma_device->vqde_ring[index].vqe_len);
1108
1109 virtio_chain_t *vic;
1110 if ((vic = virtio_queue_complete(viq, start)) == NULL) {
1111 /*
1112 * We could not locate a chain for this descriptor index, which
1113 * suggests that something has gone horribly wrong.
1114 */
1115 dev_err(viq->viq_virtio->vio_dip, CE_PANIC,
1116 "queue \"%s\" ring entry %u (descriptor %u) has no chain",
1117 viq->viq_name, (uint16_t)index, (uint16_t)start);
1118 }
1119
1120 vic->vic_received_length = len;
1121
1122 mutex_exit(&viq->viq_mutex);
1123
1124 return (vic);
1125 }
1126
1127 /*
1128 * After a call to "virtio_shutdown()", the driver must retrieve any previously
1129 * submitted chains and free any associated resources.
1130 */
1131 virtio_chain_t *
virtio_queue_evacuate(virtio_queue_t * viq)1132 virtio_queue_evacuate(virtio_queue_t *viq)
1133 {
1134 virtio_t *vio = viq->viq_virtio;
1135
1136 mutex_enter(&vio->vio_mutex);
1137 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) {
1138 dev_err(vio->vio_dip, CE_PANIC,
1139 "virtio_queue_evacuate() without virtio_shutdown()");
1140 }
1141 mutex_exit(&vio->vio_mutex);
1142
1143 mutex_enter(&viq->viq_mutex);
1144 VERIFY(viq->viq_shutdown);
1145
1146 virtio_chain_t *vic = avl_first(&viq->viq_inflight);
1147 if (vic != NULL) {
1148 avl_remove(&viq->viq_inflight, vic);
1149 }
1150
1151 mutex_exit(&viq->viq_mutex);
1152
1153 return (vic);
1154 }
1155
1156 /*
1157 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT
1158 */
1159
1160 /*
1161 * When the device returns a descriptor chain to the driver, it may provide the
1162 * length in bytes of data written into the chain. Client drivers should use
1163 * this value with care; the specification suggests some device implementations
1164 * have not always provided a useful or correct value.
1165 */
1166 size_t
virtio_chain_received_length(virtio_chain_t * vic)1167 virtio_chain_received_length(virtio_chain_t *vic)
1168 {
1169 return (vic->vic_received_length);
1170 }
1171
1172 /*
1173 * Allocate a descriptor chain for use with this queue. The "kmflags" value
1174 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F).
1175 */
1176 virtio_chain_t *
virtio_chain_alloc(virtio_queue_t * viq,int kmflags)1177 virtio_chain_alloc(virtio_queue_t *viq, int kmflags)
1178 {
1179 virtio_t *vio = viq->viq_virtio;
1180 virtio_chain_t *vic;
1181 uint_t cap;
1182
1183 /*
1184 * Direct descriptors are known by their index in the descriptor table
1185 * for the queue. We use the variable-length array member at the end
1186 * of the chain tracking object to hold the list of direct descriptors
1187 * assigned to this chain.
1188 */
1189 if (viq->viq_indirect) {
1190 /*
1191 * When using indirect descriptors we still need one direct
1192 * descriptor entry to hold the physical address and length of
1193 * the indirect descriptor table.
1194 */
1195 cap = 1;
1196 } else {
1197 /*
1198 * For direct descriptors we need to be able to track a
1199 * descriptor for each possible segment in a single chain.
1200 */
1201 cap = viq->viq_max_segs;
1202 }
1203
1204 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap;
1205 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) {
1206 return (NULL);
1207 }
1208 vic->vic_vq = viq;
1209 vic->vic_direct_capacity = cap;
1210
1211 if (viq->viq_indirect) {
1212 /*
1213 * Allocate an indirect descriptor list with the appropriate
1214 * number of entries.
1215 */
1216 if (virtio_dma_init(vio, &vic->vic_indirect_dma,
1217 sizeof (virtio_vq_desc_t) * viq->viq_max_segs,
1218 &virtio_dma_attr_indirect,
1219 DDI_DMA_CONSISTENT | DDI_DMA_WRITE,
1220 kmflags) != DDI_SUCCESS) {
1221 goto fail;
1222 }
1223
1224 /*
1225 * Allocate a single descriptor to hold the indirect list.
1226 * Leave the length as zero for now; it will be set to include
1227 * any occupied entries at push time.
1228 */
1229 mutex_enter(&viq->viq_mutex);
1230 if (virtio_chain_append_impl(vic,
1231 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0,
1232 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) {
1233 mutex_exit(&viq->viq_mutex);
1234 goto fail;
1235 }
1236 mutex_exit(&viq->viq_mutex);
1237 VERIFY3U(vic->vic_direct_used, ==, 1);
1238
1239 /*
1240 * Don't set the indirect capacity until after we've installed
1241 * the direct descriptor which points at the indirect list, or
1242 * virtio_chain_append_impl() will be confused.
1243 */
1244 vic->vic_indirect_capacity = viq->viq_max_segs;
1245 }
1246
1247 return (vic);
1248
1249 fail:
1250 virtio_dma_fini(&vic->vic_indirect_dma);
1251 kmem_free(vic, vicsz);
1252 return (NULL);
1253 }
1254
1255 void *
virtio_chain_data(virtio_chain_t * vic)1256 virtio_chain_data(virtio_chain_t *vic)
1257 {
1258 return (vic->vic_data);
1259 }
1260
1261 void
virtio_chain_data_set(virtio_chain_t * vic,void * data)1262 virtio_chain_data_set(virtio_chain_t *vic, void *data)
1263 {
1264 vic->vic_data = data;
1265 }
1266
1267 void
virtio_chain_clear(virtio_chain_t * vic)1268 virtio_chain_clear(virtio_chain_t *vic)
1269 {
1270 if (vic->vic_indirect_capacity != 0) {
1271 /*
1272 * There should only be one direct descriptor, which points at
1273 * our indirect descriptor list. We don't want to clear it
1274 * here.
1275 */
1276 VERIFY3U(vic->vic_direct_capacity, ==, 1);
1277
1278 if (vic->vic_indirect_used > 0) {
1279 /*
1280 * Clear out the indirect descriptor table.
1281 */
1282 vic->vic_indirect_used = 0;
1283 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0),
1284 virtio_dma_size(&vic->vic_indirect_dma));
1285 }
1286
1287 } else if (vic->vic_direct_capacity > 0) {
1288 /*
1289 * Release any descriptors that were assigned to us previously.
1290 */
1291 for (uint_t i = 0; i < vic->vic_direct_used; i++) {
1292 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]);
1293 vic->vic_direct[i] = 0;
1294 }
1295 vic->vic_direct_used = 0;
1296 }
1297 }
1298
1299 void
virtio_chain_free(virtio_chain_t * vic)1300 virtio_chain_free(virtio_chain_t *vic)
1301 {
1302 /*
1303 * First ensure that we have released any descriptors used by this
1304 * chain.
1305 */
1306 virtio_chain_clear(vic);
1307
1308 if (vic->vic_indirect_capacity > 0) {
1309 /*
1310 * Release the direct descriptor that points to our indirect
1311 * descriptor list.
1312 */
1313 VERIFY3U(vic->vic_direct_capacity, ==, 1);
1314 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]);
1315
1316 virtio_dma_fini(&vic->vic_indirect_dma);
1317 }
1318
1319 size_t vicsz = sizeof (*vic) +
1320 vic->vic_direct_capacity * sizeof (uint16_t);
1321
1322 kmem_free(vic, vicsz);
1323 }
1324
1325 static inline int
virtio_queue_descmap_alloc(virtio_queue_t * viq,uint_t * indexp)1326 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp)
1327 {
1328 id_t index;
1329
1330 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) {
1331 return (ENOMEM);
1332 }
1333
1334 VERIFY3S(index, >=, 0);
1335 VERIFY3S(index, <=, viq->viq_size);
1336
1337 *indexp = (uint_t)index;
1338 return (0);
1339 }
1340
1341 static int
virtio_chain_append_impl(virtio_chain_t * vic,uint64_t pa,size_t len,uint16_t flags)1342 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len,
1343 uint16_t flags)
1344 {
1345 virtio_queue_t *viq = vic->vic_vq;
1346 virtio_vq_desc_t *vqd;
1347 uint_t index;
1348
1349 /*
1350 * We're modifying the queue-wide descriptor list so make sure we have
1351 * the appropriate lock.
1352 */
1353 VERIFY(MUTEX_HELD(&viq->viq_mutex));
1354
1355 if (vic->vic_indirect_capacity != 0) {
1356 /*
1357 * Use indirect descriptors.
1358 */
1359 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) {
1360 return (DDI_FAILURE);
1361 }
1362
1363 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0);
1364
1365 if ((index = vic->vic_indirect_used++) > 0) {
1366 /*
1367 * Chain the current last indirect descriptor to the
1368 * new one.
1369 */
1370 vqd[index - 1].vqd_flags |=
1371 viq_gtoh16(viq, VIRTQ_DESC_F_NEXT);
1372 vqd[index - 1].vqd_next = viq_gtoh16(viq, index);
1373 }
1374
1375 } else {
1376 /*
1377 * Use direct descriptors.
1378 */
1379 if (vic->vic_direct_used >= vic->vic_direct_capacity) {
1380 return (DDI_FAILURE);
1381 }
1382
1383 if (virtio_queue_descmap_alloc(viq, &index) != 0) {
1384 return (DDI_FAILURE);
1385 }
1386
1387 vqd = virtio_dma_va(&viq->viq_dma, 0);
1388
1389 if (vic->vic_direct_used > 0) {
1390 /*
1391 * This is not the first entry. Chain the current
1392 * descriptor to the next one.
1393 */
1394 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1];
1395
1396 vqd[p].vqd_flags |=
1397 viq_gtoh16(viq, VIRTQ_DESC_F_NEXT);
1398 vqd[p].vqd_next = viq_gtoh16(viq, index);
1399 }
1400 vic->vic_direct[vic->vic_direct_used++] = index;
1401 }
1402
1403 vqd[index].vqd_addr = viq_gtoh64(viq, pa);
1404 vqd[index].vqd_len = viq_gtoh32(viq, len);
1405 vqd[index].vqd_flags = viq_gtoh16(viq, flags);
1406 vqd[index].vqd_next = 0;
1407
1408 return (DDI_SUCCESS);
1409 }
1410
1411 int
virtio_chain_append(virtio_chain_t * vic,uint64_t pa,size_t len,virtio_direction_t dir)1412 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len,
1413 virtio_direction_t dir)
1414 {
1415 virtio_queue_t *viq = vic->vic_vq;
1416 uint16_t flags = 0;
1417
1418 switch (dir) {
1419 case VIRTIO_DIR_DEVICE_WRITES:
1420 flags |= VIRTQ_DESC_F_WRITE;
1421 break;
1422
1423 case VIRTIO_DIR_DEVICE_READS:
1424 break;
1425
1426 default:
1427 panic("unknown direction value %u", dir);
1428 }
1429
1430 mutex_enter(&viq->viq_mutex);
1431 int r = virtio_chain_append_impl(vic, pa, len, flags);
1432 mutex_exit(&viq->viq_mutex);
1433
1434 return (r);
1435 }
1436
1437 static void
virtio_queue_flush_locked(virtio_queue_t * viq)1438 virtio_queue_flush_locked(virtio_queue_t *viq)
1439 {
1440 virtio_t *vio = viq->viq_virtio;
1441
1442 VERIFY(MUTEX_HELD(&viq->viq_mutex));
1443
1444 /*
1445 * Make sure any writes we have just made to the descriptors
1446 * (vqdr_ring[]) are visible to the device before we update the ring
1447 * pointer (vqdr_index).
1448 */
1449 membar_producer();
1450 viq->viq_dma_driver->vqdr_index =
1451 viq_gtoh16(viq, viq->viq_driver_index);
1452 VIRTQ_DMA_SYNC_FORDEV(viq);
1453
1454 /*
1455 * Determine whether the device expects us to notify it of new
1456 * descriptors.
1457 */
1458 VIRTQ_DMA_SYNC_FORKERNEL(viq);
1459 if (!(viq->viq_dma_device->vqde_flags &
1460 viq_gtoh16(viq, VIRTQ_USED_F_NO_NOTIFY))) {
1461 vio->vio_ops->vop_queue_notify(viq);
1462 }
1463 }
1464
1465 void
virtio_queue_flush(virtio_queue_t * viq)1466 virtio_queue_flush(virtio_queue_t *viq)
1467 {
1468 mutex_enter(&viq->viq_mutex);
1469 virtio_queue_flush_locked(viq);
1470 mutex_exit(&viq->viq_mutex);
1471 }
1472
1473 void
virtio_chain_submit(virtio_chain_t * vic,boolean_t flush)1474 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush)
1475 {
1476 virtio_queue_t *viq = vic->vic_vq;
1477
1478 mutex_enter(&viq->viq_mutex);
1479
1480 if (vic->vic_indirect_capacity != 0) {
1481 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0);
1482
1483 VERIFY3U(vic->vic_direct_used, ==, 1);
1484
1485 /*
1486 * This is an indirect descriptor queue. The length in bytes
1487 * of the descriptor must extend to cover the populated
1488 * indirect descriptor entries.
1489 */
1490 vqd[vic->vic_direct[0]].vqd_len = viq_gtoh32(viq,
1491 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used);
1492
1493 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV);
1494 }
1495
1496 /*
1497 * Populate the next available slot in the driver-owned ring for this
1498 * chain. The updated value of viq_driver_index is not yet visible to
1499 * the device until a subsequent queue flush.
1500 */
1501 uint16_t index = (viq->viq_driver_index++) % viq->viq_size;
1502 viq->viq_dma_driver->vqdr_ring[index] =
1503 viq_gtoh16(viq, vic->vic_direct[0]);
1504
1505 vic->vic_head = vic->vic_direct[0];
1506 avl_add(&viq->viq_inflight, vic);
1507
1508 if (flush) {
1509 virtio_queue_flush_locked(vic->vic_vq);
1510 }
1511
1512 mutex_exit(&viq->viq_mutex);
1513 }
1514
1515 /*
1516 * INTERRUPTS MANAGEMENT
1517 */
1518
1519 static const char *
virtio_interrupt_type_name(int type)1520 virtio_interrupt_type_name(int type)
1521 {
1522 switch (type) {
1523 case DDI_INTR_TYPE_MSIX:
1524 return ("MSI-X");
1525 case DDI_INTR_TYPE_MSI:
1526 return ("MSI");
1527 case DDI_INTR_TYPE_FIXED:
1528 return ("fixed");
1529 default:
1530 return ("?");
1531 }
1532 }
1533
1534 static int
virtio_interrupts_alloc(virtio_t * vio,int type,int nrequired)1535 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired)
1536 {
1537 dev_info_t *dip = vio->vio_dip;
1538 int nintrs = 0;
1539 int navail = 0;
1540
1541 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1542 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC));
1543
1544 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) {
1545 dev_err(dip, CE_WARN, "could not count %s interrupts",
1546 virtio_interrupt_type_name(type));
1547 return (DDI_FAILURE);
1548 }
1549 if (nintrs < 1) {
1550 dev_err(dip, CE_WARN, "no %s interrupts supported",
1551 virtio_interrupt_type_name(type));
1552 return (DDI_FAILURE);
1553 }
1554
1555 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) {
1556 dev_err(dip, CE_WARN, "could not count available %s interrupts",
1557 virtio_interrupt_type_name(type));
1558 return (DDI_FAILURE);
1559 }
1560 if (navail < nrequired) {
1561 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d "
1562 "available", nrequired, virtio_interrupt_type_name(type),
1563 navail);
1564 return (DDI_FAILURE);
1565 }
1566
1567 VERIFY3P(vio->vio_interrupts, ==, NULL);
1568 vio->vio_interrupts = kmem_zalloc(
1569 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP);
1570
1571 int r;
1572 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired,
1573 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) {
1574 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)",
1575 virtio_interrupt_type_name(type), r);
1576 kmem_free(vio->vio_interrupts,
1577 sizeof (ddi_intr_handle_t) * nrequired);
1578 vio->vio_interrupts = NULL;
1579 return (DDI_FAILURE);
1580 }
1581
1582 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC;
1583 vio->vio_interrupt_type = type;
1584 return (DDI_SUCCESS);
1585 }
1586
1587 static uint_t
virtio_shared_isr(caddr_t arg0,caddr_t arg1)1588 virtio_shared_isr(caddr_t arg0, caddr_t arg1)
1589 {
1590 virtio_t *vio = (virtio_t *)arg0;
1591 uint_t r = DDI_INTR_UNCLAIMED;
1592 uint8_t isr;
1593
1594 mutex_enter(&vio->vio_mutex);
1595
1596 /*
1597 * Check the ISR status to see if the interrupt applies to us. Reading
1598 * this field resets it to zero.
1599 */
1600 isr = vio->vio_ops->vop_isr_status(vio);
1601
1602 if ((isr & VIRTIO_ISR_CHECK_QUEUES) != 0) {
1603 r = DDI_INTR_CLAIMED;
1604
1605 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1606 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1607 if (viq->viq_func != NULL) {
1608 mutex_exit(&vio->vio_mutex);
1609 (void) viq->viq_func(viq->viq_funcarg, arg0);
1610 mutex_enter(&vio->vio_mutex);
1611
1612 if (vio->vio_initlevel &
1613 VIRTIO_INITLEVEL_SHUTDOWN) {
1614 /*
1615 * The device was shut down while in a
1616 * queue handler routine.
1617 */
1618 break;
1619 }
1620 }
1621 }
1622 }
1623
1624 mutex_exit(&vio->vio_mutex);
1625
1626 /*
1627 * vio_cfgchange_{handler,handlerarg} cannot change while interrupts
1628 * are configured so it is safe to access them outside of the lock.
1629 */
1630
1631 if ((isr & VIRTIO_ISR_CHECK_CONFIG) != 0) {
1632 r = DDI_INTR_CLAIMED;
1633 if (vio->vio_cfgchange_handler != NULL) {
1634 (void) vio->vio_cfgchange_handler(
1635 (caddr_t)vio->vio_cfgchange_handlerarg,
1636 (caddr_t)vio);
1637 }
1638 }
1639
1640 return (r);
1641 }
1642
1643 static int
virtio_interrupts_setup(virtio_t * vio,int allow_types)1644 virtio_interrupts_setup(virtio_t *vio, int allow_types)
1645 {
1646 dev_info_t *dip = vio->vio_dip;
1647 int types;
1648 int count = 0;
1649
1650 mutex_enter(&vio->vio_mutex);
1651
1652 /*
1653 * Determine the number of interrupts we'd like based on the number of
1654 * virtqueues.
1655 */
1656 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1657 viq = list_next(&vio->vio_queues, viq)) {
1658 if (viq->viq_func != NULL) {
1659 count++;
1660 }
1661 }
1662
1663 /*
1664 * If there is a configuration change handler, one extra interrupt
1665 * is needed for that.
1666 */
1667 if (vio->vio_cfgchange_handler != NULL)
1668 count++;
1669
1670 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) {
1671 dev_err(dip, CE_WARN, "could not get supported interrupts");
1672 mutex_exit(&vio->vio_mutex);
1673 return (DDI_FAILURE);
1674 }
1675
1676 if (allow_types != VIRTIO_ANY_INTR_TYPE) {
1677 /*
1678 * Restrict the possible interrupt types at the request of the
1679 * driver.
1680 */
1681 types &= allow_types;
1682 }
1683
1684 /*
1685 * Try each potential interrupt type in descending order of preference.
1686 * Note that the specification does not appear to allow for the use of
1687 * classical MSI, so we are limited to either MSI-X or fixed
1688 * interrupts.
1689 */
1690 if (types & DDI_INTR_TYPE_MSIX) {
1691 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX,
1692 count) == DDI_SUCCESS) {
1693 goto add_handlers;
1694 }
1695 }
1696 if (types & DDI_INTR_TYPE_FIXED) {
1697 /*
1698 * If fixed interrupts are all that are available, we'll just
1699 * ask for one.
1700 */
1701 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) ==
1702 DDI_SUCCESS) {
1703 goto add_handlers;
1704 }
1705 }
1706
1707 dev_err(dip, CE_WARN, "interrupt allocation failed");
1708 mutex_exit(&vio->vio_mutex);
1709 return (DDI_FAILURE);
1710
1711 add_handlers:
1712 /*
1713 * Ensure that we have not been given any high-level interrupts as our
1714 * interrupt handlers do not support them.
1715 */
1716 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1717 uint_t ipri;
1718
1719 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) !=
1720 DDI_SUCCESS) {
1721 dev_err(dip, CE_WARN, "could not determine interrupt "
1722 "priority");
1723 goto fail;
1724 }
1725
1726 if (ipri >= ddi_intr_get_hilevel_pri()) {
1727 dev_err(dip, CE_WARN, "high level interrupts not "
1728 "supported");
1729 goto fail;
1730 }
1731
1732 /*
1733 * Record the highest priority we've been allocated to use for
1734 * mutex initialisation.
1735 */
1736 if (i == 0 || ipri > vio->vio_interrupt_priority) {
1737 vio->vio_interrupt_priority = ipri;
1738 }
1739 }
1740
1741 /*
1742 * Get the interrupt capabilities from the first handle to determine
1743 * whether we need to use ddi_intr_block_enable(9F).
1744 */
1745 if (ddi_intr_get_cap(vio->vio_interrupts[0],
1746 &vio->vio_interrupt_cap) != DDI_SUCCESS) {
1747 dev_err(dip, CE_WARN, "failed to get interrupt capabilities");
1748 goto fail;
1749 }
1750
1751 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1752 VERIFY3S(vio->vio_ninterrupts, ==, 1);
1753 /*
1754 * For fixed interrupts, we need to use our shared handler to
1755 * multiplex the per-queue handlers provided by the driver.
1756 */
1757 if (ddi_intr_add_handler(vio->vio_interrupts[0],
1758 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) {
1759 dev_err(dip, CE_WARN, "adding shared %s interrupt "
1760 "handler failed", virtio_interrupt_type_name(
1761 vio->vio_interrupt_type));
1762 goto fail;
1763 }
1764
1765 goto done;
1766 }
1767
1768 VERIFY3S(vio->vio_ninterrupts, ==, count);
1769
1770 uint_t n = 0;
1771
1772 /* Bind the configuration vector interrupt */
1773 if (vio->vio_cfgchange_handler != NULL) {
1774 if (ddi_intr_add_handler(vio->vio_interrupts[n],
1775 vio->vio_cfgchange_handler,
1776 (caddr_t)vio->vio_cfgchange_handlerarg,
1777 (caddr_t)vio) != DDI_SUCCESS) {
1778 dev_err(dip, CE_WARN,
1779 "adding configuration change interrupt failed");
1780 goto fail;
1781 }
1782 vio->vio_cfgchange_handler_added = B_TRUE;
1783 vio->vio_cfgchange_handler_index = n;
1784 n++;
1785 }
1786
1787 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1788 viq = list_next(&vio->vio_queues, viq)) {
1789 if (viq->viq_func == NULL) {
1790 continue;
1791 }
1792
1793 if (ddi_intr_add_handler(vio->vio_interrupts[n],
1794 viq->viq_func, (caddr_t)viq->viq_funcarg,
1795 (caddr_t)vio) != DDI_SUCCESS) {
1796 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed",
1797 n, viq->viq_name);
1798 goto fail;
1799 }
1800
1801 viq->viq_handler_index = n;
1802 viq->viq_handler_added = B_TRUE;
1803 n++;
1804 }
1805
1806 done:
1807 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED;
1808 mutex_exit(&vio->vio_mutex);
1809 return (DDI_SUCCESS);
1810
1811 fail:
1812 virtio_interrupts_teardown(vio);
1813 mutex_exit(&vio->vio_mutex);
1814 return (DDI_FAILURE);
1815 }
1816
1817 static void
virtio_interrupts_teardown(virtio_t * vio)1818 virtio_interrupts_teardown(virtio_t *vio)
1819 {
1820 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1821
1822 virtio_interrupts_disable_locked(vio);
1823
1824 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1825 /*
1826 * Remove the multiplexing interrupt handler.
1827 */
1828 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) {
1829 int r;
1830
1831 VERIFY3S(vio->vio_ninterrupts, ==, 1);
1832
1833 if ((r = ddi_intr_remove_handler(
1834 vio->vio_interrupts[0])) != DDI_SUCCESS) {
1835 dev_err(vio->vio_dip, CE_WARN, "removing "
1836 "shared interrupt handler failed (%d)", r);
1837 }
1838 }
1839 } else {
1840 /*
1841 * Remove the configuration vector interrupt handler.
1842 */
1843 if (vio->vio_cfgchange_handler_added) {
1844 int r;
1845
1846 if ((r = ddi_intr_remove_handler(
1847 vio->vio_interrupts[0])) != DDI_SUCCESS) {
1848 dev_err(vio->vio_dip, CE_WARN,
1849 "removing configuration change interrupt "
1850 "handler failed (%d)", r);
1851 }
1852 vio->vio_cfgchange_handler_added = B_FALSE;
1853 }
1854
1855 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1856 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1857 int r;
1858
1859 if (!viq->viq_handler_added) {
1860 continue;
1861 }
1862
1863 if ((r = ddi_intr_remove_handler(
1864 vio->vio_interrupts[viq->viq_handler_index])) !=
1865 DDI_SUCCESS) {
1866 dev_err(vio->vio_dip, CE_WARN, "removing "
1867 "interrupt handler (%s) failed (%d)",
1868 viq->viq_name, r);
1869 }
1870
1871 viq->viq_handler_added = B_FALSE;
1872 }
1873 }
1874 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED;
1875
1876 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) {
1877 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1878 int r;
1879
1880 if ((r = ddi_intr_free(vio->vio_interrupts[i])) !=
1881 DDI_SUCCESS) {
1882 dev_err(vio->vio_dip, CE_WARN, "freeing "
1883 "interrupt %u failed (%d)", i, r);
1884 }
1885 }
1886 kmem_free(vio->vio_interrupts,
1887 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts);
1888 vio->vio_interrupts = NULL;
1889 vio->vio_ninterrupts = 0;
1890 vio->vio_interrupt_type = 0;
1891 vio->vio_interrupt_cap = 0;
1892 vio->vio_interrupt_priority = 0;
1893
1894 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC;
1895 }
1896 }
1897
1898 static void
virtio_interrupts_unwind(virtio_t * vio)1899 virtio_interrupts_unwind(virtio_t *vio)
1900 {
1901 VERIFY(MUTEX_HELD(&vio->vio_mutex));
1902
1903 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1904 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1905 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1906 if (!viq->viq_handler_added) {
1907 continue;
1908 }
1909
1910 vio->vio_ops->vop_msix_queue_set(vio, viq->viq_index,
1911 VIRTIO_LEGACY_MSI_NO_VECTOR);
1912 }
1913
1914 if (vio->vio_cfgchange_handler_added) {
1915 vio->vio_ops->vop_msix_config_set(vio,
1916 VIRTIO_LEGACY_MSI_NO_VECTOR);
1917 }
1918 }
1919
1920 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1921 (void) ddi_intr_block_disable(vio->vio_interrupts,
1922 vio->vio_ninterrupts);
1923 } else {
1924 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1925 (void) ddi_intr_disable(vio->vio_interrupts[i]);
1926 }
1927 }
1928
1929 /*
1930 * Disabling the interrupts makes the MSI-X fields disappear from the
1931 * BAR once more in the legacy interface.
1932 */
1933 if (!virtio_modern(vio))
1934 vio->vio_legacy_cfg_offset = VIRTIO_LEGACY_CFG_OFFSET;
1935 }
1936
1937 int
virtio_interrupts_enable(virtio_t * vio)1938 virtio_interrupts_enable(virtio_t *vio)
1939 {
1940 mutex_enter(&vio->vio_mutex);
1941 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) {
1942 mutex_exit(&vio->vio_mutex);
1943 return (DDI_SUCCESS);
1944 }
1945
1946 int r = DDI_SUCCESS;
1947 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1948 r = ddi_intr_block_enable(vio->vio_interrupts,
1949 vio->vio_ninterrupts);
1950 } else {
1951 for (int i = 0; i < vio->vio_ninterrupts; i++) {
1952 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) !=
1953 DDI_SUCCESS) {
1954 /*
1955 * Disable the interrupts we have enabled so
1956 * far.
1957 */
1958 for (i--; i >= 0; i--) {
1959 (void) ddi_intr_disable(
1960 vio->vio_interrupts[i]);
1961 }
1962 break;
1963 }
1964 }
1965 }
1966
1967 if (r != DDI_SUCCESS) {
1968 mutex_exit(&vio->vio_mutex);
1969 return (r);
1970 }
1971
1972 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1973 /*
1974 * When asked to enable the interrupts, the system enables
1975 * MSI-X in the PCI configuration for the device. While
1976 * enabled, the extra MSI-X configuration table fields appear
1977 * between the general and the device-specific regions of the
1978 * BAR in the legacy interface.
1979 */
1980 if (!virtio_modern(vio)) {
1981 vio->vio_legacy_cfg_offset =
1982 VIRTIO_LEGACY_CFG_OFFSET_MSIX;
1983 }
1984
1985 for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1986 viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1987 if (!viq->viq_handler_added) {
1988 continue;
1989 }
1990
1991 uint16_t qi = viq->viq_index;
1992 uint16_t msi = viq->viq_handler_index;
1993
1994 /*
1995 * Route interrupts for this queue to the assigned
1996 * MSI-X vector number.
1997 */
1998 vio->vio_ops->vop_msix_queue_set(vio, qi, msi);
1999
2000 /*
2001 * The device may not actually accept the vector number
2002 * we're attempting to program. We need to confirm
2003 * that configuration was successful by re-reading the
2004 * configuration we just wrote.
2005 */
2006 if (vio->vio_ops->vop_msix_queue_get(vio, qi) != msi) {
2007 dev_err(vio->vio_dip, CE_WARN,
2008 "failed to configure MSI-X vector %u for "
2009 "queue \"%s\" (#%u)", (uint_t)msi,
2010 viq->viq_name, (uint_t)qi);
2011
2012 virtio_interrupts_unwind(vio);
2013 mutex_exit(&vio->vio_mutex);
2014 return (DDI_FAILURE);
2015 }
2016 }
2017
2018 if (vio->vio_cfgchange_handler_added) {
2019 vio->vio_ops->vop_msix_config_set(vio,
2020 vio->vio_cfgchange_handler_index);
2021
2022 /* Verify the value was accepted. */
2023 if (vio->vio_ops->vop_msix_config_get(vio) !=
2024 vio->vio_cfgchange_handler_index) {
2025 dev_err(vio->vio_dip, CE_WARN,
2026 "failed to configure MSI-X vector for "
2027 "configuration");
2028
2029 virtio_interrupts_unwind(vio);
2030 mutex_exit(&vio->vio_mutex);
2031 return (DDI_FAILURE);
2032 }
2033 }
2034 }
2035
2036 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED;
2037
2038 mutex_exit(&vio->vio_mutex);
2039 return (DDI_SUCCESS);
2040 }
2041
2042 static void
virtio_interrupts_disable_locked(virtio_t * vio)2043 virtio_interrupts_disable_locked(virtio_t *vio)
2044 {
2045 VERIFY(MUTEX_HELD(&vio->vio_mutex));
2046
2047 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) {
2048 return;
2049 }
2050
2051 virtio_interrupts_unwind(vio);
2052
2053 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED;
2054 }
2055
2056 void
virtio_interrupts_disable(virtio_t * vio)2057 virtio_interrupts_disable(virtio_t *vio)
2058 {
2059 mutex_enter(&vio->vio_mutex);
2060 virtio_interrupts_disable_locked(vio);
2061 mutex_exit(&vio->vio_mutex);
2062 }
2063
2064 /*
2065 * Map a PCI BAR (0-5) to a regset number.
2066 */
2067 static int
virtio_bar_to_rnumber(virtio_t * vio,uint8_t bar)2068 virtio_bar_to_rnumber(virtio_t *vio, uint8_t bar)
2069 {
2070 pci_regspec_t *regs;
2071 uint_t bar_offset, regs_length, rcount;
2072 int rnumber = -1;
2073
2074 if (bar > 5)
2075 return (-1);
2076
2077 /*
2078 * PCI_CONF_BASE0 is 0x10; each BAR is 4 bytes apart.
2079 */
2080 bar_offset = PCI_CONF_BASE0 + sizeof (uint32_t) * bar;
2081
2082 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, vio->vio_dip,
2083 DDI_PROP_DONTPASS, "reg", (int **)®s, ®s_length) !=
2084 DDI_PROP_SUCCESS) {
2085 return (-1);
2086 }
2087
2088 rcount = regs_length * sizeof (int) / sizeof (pci_regspec_t);
2089
2090 for (int i = 0; i < rcount; i++) {
2091 if (PCI_REG_REG_G(regs[i].pci_phys_hi) == bar_offset) {
2092 rnumber = i;
2093 break;
2094 }
2095 }
2096
2097 ddi_prop_free(regs);
2098
2099 return ((rnumber < rcount) ? rnumber : -1);
2100 }
2101