xref: /illumos-gate/usr/src/uts/common/io/virtio/virtio_main.c (revision 952a36939b6b7ca0726a71954c741eab4be19535)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
15  * Copyright 2025 Oxide Computer Company
16  * Copyright 2026 Hans Rosenfeld
17  */
18 
19 /*
20  * VIRTIO FRAMEWORK
21  *
22  * For design and usage documentation, see the comments in "virtio.h".
23  */
24 
25 #include <sys/conf.h>
26 #include <sys/kmem.h>
27 #include <sys/debug.h>
28 #include <sys/modctl.h>
29 #include <sys/autoconf.h>
30 #include <sys/ddi_impldefs.h>
31 #include <sys/ddi.h>
32 #include <sys/inttypes.h>
33 #include <sys/sunddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/avintr.h>
36 #include <sys/spl.h>
37 #include <sys/promif.h>
38 #include <sys/list.h>
39 #include <sys/bootconf.h>
40 #include <sys/bootsvcs.h>
41 #include <sys/sysmacros.h>
42 #include <sys/pci.h>
43 #include <sys/pci_cap.h>
44 #include <sys/stdbit.h>
45 
46 #include "virtio.h"
47 #include "virtio_impl.h"
48 #include "virtio_endian.h"
49 
50 
51 /*
52  * Linkage structures
53  */
54 static struct modlmisc virtio_modlmisc = {
55 	.misc_modops =			&mod_miscops,
56 	.misc_linkinfo =		"VIRTIO common routines",
57 };
58 
59 static struct modlinkage virtio_modlinkage = {
60 	.ml_rev =			MODREV_1,
61 	.ml_linkage =			{ &virtio_modlmisc, NULL }
62 };
63 
64 int
_init(void)65 _init(void)
66 {
67 	return (mod_install(&virtio_modlinkage));
68 }
69 
70 int
_fini(void)71 _fini(void)
72 {
73 	return (mod_remove(&virtio_modlinkage));
74 }
75 
76 int
_info(struct modinfo * modinfop)77 _info(struct modinfo *modinfop)
78 {
79 	return (mod_info(&virtio_modlinkage, modinfop));
80 }
81 
82 static void virtio_unmap_cap(virtio_t *, virtio_pci_cap_t *);
83 static boolean_t virtio_map_cap(virtio_t *, virtio_pci_cap_t *);
84 static void virtio_discover_pci_caps(virtio_t *, ddi_acc_handle_t);
85 static void virtio_set_status(virtio_t *, uint8_t);
86 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t,
87     uint16_t);
88 static int virtio_interrupts_setup(virtio_t *, int);
89 static void virtio_interrupts_teardown(virtio_t *);
90 static void virtio_interrupts_disable_locked(virtio_t *);
91 static void virtio_queue_free(virtio_queue_t *);
92 static int virtio_bar_to_rnumber(virtio_t *, uint8_t);
93 
94 /*
95  * Tuneable that forces use of the legacy interface even if the hypervisor
96  * presents transitional devices. It has no effect if only a modern device is
97  * presented.
98  */
99 int virtio_force_legacy = 0;
100 
101 /*
102  * We use the same device access attributes for BAR mapping and access to the
103  * virtqueue memory.
104  */
105 ddi_device_acc_attr_t virtio_acc_attr = {
106 	.devacc_attr_version =		DDI_DEVICE_ATTR_V1,
107 	.devacc_attr_endian_flags =	DDI_NEVERSWAP_ACC,
108 	.devacc_attr_dataorder =	DDI_STORECACHING_OK_ACC,
109 	.devacc_attr_access =		DDI_DEFAULT_ACC
110 };
111 
112 
113 /*
114  * DMA attributes for the memory given to the device for queue management.
115  */
116 ddi_dma_attr_t virtio_dma_attr_queue = {
117 	.dma_attr_version =		DMA_ATTR_V0,
118 	.dma_attr_addr_lo =		0x0000000000000000,
119 	/*
120 	 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted
121 	 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a
122 	 * 32-bit register.
123 	 */
124 	.dma_attr_addr_hi =		0x00000FFFFFFFF000,
125 	.dma_attr_count_max =		0x00000000FFFFFFFF,
126 	.dma_attr_align =		VIRTIO_PAGE_SIZE,
127 	.dma_attr_burstsizes =		1,
128 	.dma_attr_minxfer =		1,
129 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
130 	.dma_attr_seg =			0x00000000FFFFFFFF,
131 	.dma_attr_sgllen =		1,
132 	.dma_attr_granular =		1,
133 	.dma_attr_flags =		0
134 };
135 
136 /*
137  * DMA attributes for the the allocation of indirect descriptor lists.  The
138  * indirect list is referenced by a regular descriptor entry: the physical
139  * address field is 64 bits wide, but the length field is only 32 bits.  Each
140  * descriptor is 16 bytes long.
141  */
142 ddi_dma_attr_t virtio_dma_attr_indirect = {
143 	.dma_attr_version =		DMA_ATTR_V0,
144 	.dma_attr_addr_lo =		0x0000000000000000,
145 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
146 	.dma_attr_count_max =		0x00000000FFFFFFFF,
147 	.dma_attr_align =		sizeof (struct virtio_vq_desc),
148 	.dma_attr_burstsizes =		1,
149 	.dma_attr_minxfer =		1,
150 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
151 	.dma_attr_seg =			0x00000000FFFFFFFF,
152 	.dma_attr_sgllen =		1,
153 	.dma_attr_granular =		1,
154 	.dma_attr_flags =		0
155 };
156 
157 
158 void
virtio_fini(virtio_t * vio,boolean_t failed)159 virtio_fini(virtio_t *vio, boolean_t failed)
160 {
161 	mutex_enter(&vio->vio_mutex);
162 
163 	virtio_interrupts_teardown(vio);
164 
165 	virtio_queue_t *viq;
166 	while ((viq = list_remove_head(&vio->vio_queues)) != NULL) {
167 		virtio_queue_free(viq);
168 	}
169 	list_destroy(&vio->vio_queues);
170 	mutex_destroy(&vio->vio_qlock);
171 
172 	if (failed) {
173 		/*
174 		 * Signal to the host that device setup failed.
175 		 */
176 		vio->vio_ops->vop_set_status_locked(vio, VIRTIO_STATUS_FAILED);
177 	} else {
178 		vio->vio_ops->vop_device_reset_locked(vio);
179 	}
180 
181 	/*
182 	 * We don't need to do anything for the provider initlevel, as it
183 	 * merely records the fact that virtio_init_complete() was called.
184 	 */
185 	vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER;
186 
187 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) {
188 		/*
189 		 * Unmap PCI BARs
190 		 */
191 		if (vio->vio_bar != NULL)
192 			ddi_regs_map_free(&vio->vio_barh);
193 
194 		virtio_unmap_cap(vio, &vio->vio_cap_common);
195 		virtio_unmap_cap(vio, &vio->vio_cap_notify);
196 		virtio_unmap_cap(vio, &vio->vio_cap_isr);
197 		virtio_unmap_cap(vio, &vio->vio_cap_device);
198 
199 		vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS;
200 	}
201 
202 	/*
203 	 * Ensure we have torn down everything we set up.
204 	 */
205 	vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN;
206 	VERIFY0(vio->vio_initlevel);
207 
208 	mutex_exit(&vio->vio_mutex);
209 	mutex_destroy(&vio->vio_mutex);
210 
211 	kmem_free(vio, sizeof (*vio));
212 }
213 
214 /*
215  * Early device initialisation for virtio devices.
216  */
217 virtio_t *
virtio_init(dev_info_t * dip)218 virtio_init(dev_info_t *dip)
219 {
220 	/*
221 	 * First, let's see what kind of device this is.
222 	 */
223 	ddi_acc_handle_t pci;
224 	if (pci_config_setup(dip, &pci) != DDI_SUCCESS) {
225 		dev_err(dip, CE_WARN, "pci_config_setup failed");
226 		return (NULL);
227 	}
228 
229 	uint16_t devid;
230 	if ((devid = pci_config_get16(pci, PCI_CONF_DEVID)) == PCI_EINVAL16) {
231 		dev_err(dip, CE_WARN, "could not read config space devid");
232 		pci_config_teardown(&pci);
233 		return (NULL);
234 	}
235 
236 	uint8_t revid;
237 	if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) {
238 		dev_err(dip, CE_WARN, "could not read config space revid");
239 		pci_config_teardown(&pci);
240 		return (NULL);
241 	}
242 
243 	virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP);
244 	vio->vio_dip = dip;
245 
246 	virtio_discover_pci_caps(vio, pci);
247 	pci_config_teardown(&pci);
248 
249 	/*
250 	 * In order to operate over the modern interface we must have found a
251 	 * minimum set of capabiities.
252 	 */
253 	boolean_t found_modern_caps =
254 	    (vio->vio_cap_common.vpc_type != 0 &&
255 	    vio->vio_cap_notify.vpc_type != 0 &&
256 	    vio->vio_cap_isr.vpc_type != 0 &&
257 	    vio->vio_cap_device.vpc_type != 0);
258 
259 	if (devid >= VIRTIO_MIN_MODERN_DEVID) {
260 		/*
261 		 * This is a purely "modern" device. If we haven't found the
262 		 * required PCI capabilities then we can't proceed.
263 		 */
264 		if (!found_modern_caps) {
265 			dev_err(dip, CE_WARN,
266 			    "Did not find required PCI capabilities for a "
267 			    " modern VirtIO device");
268 			kmem_free(vio, sizeof (*vio));
269 			return (NULL);
270 		}
271 
272 		/*
273 		 * There is nothing else that is mandatory for a modern device
274 		 * that we can check.
275 		 */
276 		vio->vio_mode = VIRTIO_MODE_MODERN;
277 		vio->vio_ops = &virtio_modern_ops;
278 	} else {
279 		/*
280 		 * This could be a pure "legacy" or a "transitional" device.
281 		 * In either case the specification requires that the device
282 		 * advertise as PCI Revision 0.
283 		 */
284 		if (revid != 0) {
285 			dev_err(dip, CE_WARN, "PCI Revision %u incorrect for "
286 			    "transitional or legacy virtio device",
287 			    (uint_t)revid);
288 			kmem_free(vio, sizeof (*vio));
289 			return (NULL);
290 		}
291 
292 		/*
293 		 * If we found the modern PCI capabilities then we're
294 		 * transitional, otherwise we're legacy. We will always
295 		 * choose to use the modern interfaces on a transitional
296 		 * device. Ostensibly the VIRTIO_F_VERSION_1 flag is intended
297 		 * to help with this decision, but it is only visible through
298 		 * the modern interface!
299 		 */
300 		if (found_modern_caps && virtio_force_legacy == 0) {
301 			vio->vio_mode = VIRTIO_MODE_TRANSITIONAL;
302 			vio->vio_ops = &virtio_modern_ops;
303 		} else {
304 			vio->vio_mode = VIRTIO_MODE_LEGACY;
305 			vio->vio_ops = &virtio_legacy_ops;
306 		}
307 	}
308 
309 	if (vio->vio_mode == VIRTIO_MODE_LEGACY) {
310 		int rnumber = virtio_bar_to_rnumber(vio, VIRTIO_LEGACY_BAR);
311 
312 		/*
313 		 * Map PCI BAR0 for legacy device access.
314 		 */
315 		if (rnumber == -1 || ddi_regs_map_setup(dip, rnumber,
316 		    (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr,
317 		    &vio->vio_barh) != DDI_SUCCESS) {
318 			dev_err(dip, CE_WARN, "Failed to map BAR0");
319 			kmem_free(vio, sizeof (*vio));
320 			return (NULL);
321 		}
322 	} else {
323 		/*
324 		 * Map the BAR regions required for the modern interface.
325 		 */
326 		if (!virtio_map_cap(vio, &vio->vio_cap_common) ||
327 		    !virtio_map_cap(vio, &vio->vio_cap_notify) ||
328 		    !virtio_map_cap(vio, &vio->vio_cap_isr) ||
329 		    !virtio_map_cap(vio, &vio->vio_cap_device)) {
330 			kmem_free(vio, sizeof (*vio));
331 			return (NULL);
332 		}
333 	}
334 	vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS;
335 
336 	/*
337 	 * We initialise the mutex without an interrupt priority to ease the
338 	 * implementation of some of the configuration space access routines.
339 	 * Drivers using the virtio framework MUST make a call to
340 	 * "virtio_init_complete()" prior to spawning other threads or enabling
341 	 * interrupt handlers, at which time we will destroy and reinitialise
342 	 * the mutex for use in our interrupt handlers.
343 	 */
344 	mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL);
345 
346 	list_create(&vio->vio_queues, sizeof (virtio_queue_t),
347 	    offsetof(virtio_queue_t, viq_link));
348 	mutex_init(&vio->vio_qlock, NULL, MUTEX_DRIVER, NULL);
349 	vio->vio_qcur = UINT16_MAX;
350 
351 	/*
352 	 * Virtio devices require a few common steps before we can negotiate
353 	 * device features.
354 	 */
355 	virtio_device_reset(vio);
356 	virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE);
357 	virtio_set_status(vio, VIRTIO_STATUS_DRIVER);
358 
359 	vio->vio_features_device = vio->vio_ops->vop_device_get_features(vio);
360 	vio->vio_features = vio->vio_features_device;
361 
362 	return (vio);
363 }
364 
365 boolean_t
virtio_init_features(virtio_t * vio,uint64_t driver_features,boolean_t allow_indirect)366 virtio_init_features(virtio_t *vio, uint64_t driver_features,
367     boolean_t allow_indirect)
368 {
369 	if (!virtio_modern(vio) && driver_features >> 32 != 0) {
370 		dev_err(vio->vio_dip, CE_WARN,
371 		    "driver programming error; high bits set in features");
372 		return (B_FALSE);
373 	}
374 
375 	if (allow_indirect)
376 		driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
377 	if (virtio_modern(vio))
378 		driver_features |= VIRTIO_F_VERSION_1;
379 
380 	vio->vio_features &= driver_features;
381 
382 	if (!vio->vio_ops->vop_device_set_features(vio, vio->vio_features)) {
383 		dev_err(vio->vio_dip, CE_WARN, "feature negotiation failed");
384 		return (B_FALSE);
385 	}
386 
387 	/*
388 	 * With the legacy interface the device-specific configuration begins
389 	 * at an offset into the BAR that depends on whether we have enabled
390 	 * MSI-X interrupts or not. Start out with the offset for pre-MSI-X
391 	 * operation so that we can read device configuration space prior to
392 	 * configuring interrupts.
393 	 */
394 	if (!virtio_modern(vio))
395 		vio->vio_legacy_cfg_offset = VIRTIO_LEGACY_CFG_OFFSET;
396 
397 	return (B_TRUE);
398 }
399 
400 /*
401  * Some virtio devices can change their device configuration state at any
402  * time. This function may be called by the driver during the initialisation
403  * phase - before calling virtio_init_complete() - in order to register a
404  * handler function which will be called when the device configuration space
405  * is updated.
406  */
407 void
virtio_register_cfgchange_handler(virtio_t * vio,ddi_intr_handler_t * func,void * funcarg)408 virtio_register_cfgchange_handler(virtio_t *vio, ddi_intr_handler_t *func,
409     void *funcarg)
410 {
411 	VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
412 	VERIFY(!vio->vio_cfgchange_handler_added);
413 
414 	mutex_enter(&vio->vio_mutex);
415 	vio->vio_cfgchange_handler = func;
416 	vio->vio_cfgchange_handlerarg = funcarg;
417 	mutex_exit(&vio->vio_mutex);
418 }
419 
420 /*
421  * This function must be called by the driver once it has completed early setup
422  * calls.  The value of "allowed_interrupt_types" is a mask of interrupt types
423  * (DDI_INTR_TYPE_MSIX, etc) that we'll try to use when installing handlers, or
424  * the special value 0 to allow the system to use any available type.
425  */
426 int
virtio_init_complete(virtio_t * vio,int allowed_interrupt_types)427 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types)
428 {
429 	VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER));
430 	vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER;
431 
432 	if (!list_is_empty(&vio->vio_queues) ||
433 	    vio->vio_cfgchange_handler != NULL) {
434 		/*
435 		 * Set up interrupts for the queues that have been registered.
436 		 */
437 		if (virtio_interrupts_setup(vio, allowed_interrupt_types) !=
438 		    DDI_SUCCESS) {
439 			return (DDI_FAILURE);
440 		}
441 	}
442 
443 	/*
444 	 * We can allocate the mutex once we know the priority.
445 	 */
446 	mutex_destroy(&vio->vio_mutex);
447 	mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
448 	for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
449 	    viq = list_next(&vio->vio_queues, viq)) {
450 		mutex_destroy(&viq->viq_mutex);
451 		mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER,
452 		    virtio_intr_pri(vio));
453 	}
454 
455 	/*
456 	 * Enable the queues.
457 	 */
458 	for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
459 	    viq = list_next(&vio->vio_queues, viq)) {
460 		vio->vio_ops->vop_queue_enable_set(vio, viq->viq_index, true);
461 	}
462 
463 	virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK);
464 
465 	return (DDI_SUCCESS);
466 }
467 
468 boolean_t
virtio_features_present(virtio_t * vio,uint64_t feature_mask)469 virtio_features_present(virtio_t *vio, uint64_t feature_mask)
470 {
471 	return ((vio->vio_features & feature_mask) == feature_mask);
472 }
473 
474 uint32_t
virtio_features(virtio_t * vio)475 virtio_features(virtio_t *vio)
476 {
477 	return (vio->vio_features);
478 }
479 
480 boolean_t
virtio_modern(virtio_t * vio)481 virtio_modern(virtio_t *vio)
482 {
483 	return (vio->vio_mode != VIRTIO_MODE_LEGACY);
484 }
485 
486 void
virtio_acquireq(virtio_t * vio,uint16_t qidx)487 virtio_acquireq(virtio_t *vio, uint16_t qidx)
488 {
489 	mutex_enter(&vio->vio_qlock);
490 	if (vio->vio_qcur != qidx) {
491 		vio->vio_ops->vop_queue_select(vio, qidx);
492 		vio->vio_qcur = qidx;
493 	}
494 }
495 
496 void
virtio_releaseq(virtio_t * vio)497 virtio_releaseq(virtio_t *vio)
498 {
499 	mutex_exit(&vio->vio_qlock);
500 }
501 
502 void *
virtio_intr_pri(virtio_t * vio)503 virtio_intr_pri(virtio_t *vio)
504 {
505 	VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED);
506 
507 	return (DDI_INTR_PRI(vio->vio_interrupt_priority));
508 }
509 
510 static void
virtio_unmap_cap(virtio_t * vio,virtio_pci_cap_t * cap)511 virtio_unmap_cap(virtio_t *vio, virtio_pci_cap_t *cap)
512 {
513 	if (cap->vpc_type != 0 && cap->vpc_bar != NULL)
514 		ddi_regs_map_free(&cap->vpc_barh);
515 }
516 
517 static boolean_t
virtio_map_cap(virtio_t * vio,virtio_pci_cap_t * cap)518 virtio_map_cap(virtio_t *vio, virtio_pci_cap_t *cap)
519 {
520 	static uint8_t baridx = UINT8_MAX;
521 	static int rnumber = -1;
522 
523 	VERIFY(cap->vpc_type);
524 
525 	/*
526 	 * With most hypervisors all of the capabilities point to the same BAR
527 	 * so we can cache and re-use the corresponding register number.
528 	 * This function is only called serially from `virtio_init` during
529 	 * driver attach so it is safe to use static locals.
530 	 */
531 	if (baridx != cap->vpc_baridx) {
532 		baridx = cap->vpc_baridx;
533 		rnumber = virtio_bar_to_rnumber(vio, baridx);
534 	}
535 
536 	if (rnumber == -1 || ddi_regs_map_setup(vio->vio_dip, rnumber,
537 	    (caddr_t *)&cap->vpc_bar, cap->vpc_offset, cap->vpc_size,
538 	    &virtio_acc_attr, &cap->vpc_barh) != DDI_SUCCESS) {
539 		dev_err(vio->vio_dip, CE_WARN,
540 		    "Failed to map CAP %u @ "
541 		    "BAR%u 0x%" PRIx64 "+%" PRIx64,
542 		    cap->vpc_type, cap->vpc_baridx,
543 		    cap->vpc_offset, cap->vpc_size);
544 		return (B_FALSE);
545 	}
546 
547 	return (B_TRUE);
548 }
549 
550 /*
551  * Devices which are capable of operating via the "modern" VirtIO interface,
552  * which includes "transitional" devices, present a number of PCI capabilities
553  * of the vendor-specific type.
554  */
555 static void
virtio_discover_pci_caps(virtio_t * vio,ddi_acc_handle_t pci)556 virtio_discover_pci_caps(virtio_t *vio, ddi_acc_handle_t pci)
557 {
558 	uint16_t idx;
559 
560 	for (idx = 0; ; idx++) {
561 		virtio_pci_cap_t *cap;
562 		uint16_t base;
563 		uint32_t id;
564 
565 		if (pci_cap_probe(pci, idx, &id, &base) != DDI_SUCCESS)
566 			break;
567 
568 		/* The VirtIO caps are all of the "vendor-specific" type */
569 		if (id != PCI_CAP_ID_VS)
570 			continue;
571 
572 		uint8_t type = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
573 		    VIRTIO_PCI_CAP_TYPE);
574 
575 		uint8_t min_len = VIRTIO_PCI_CAP_BARLEN + sizeof (uint32_t);
576 
577 		/* We are currently only interested in the following types */
578 		switch (type) {
579 		case VPC_COMMON_CFG:
580 			cap = &vio->vio_cap_common;
581 			break;
582 		case VPC_NOTIFY_CFG:
583 			cap = &vio->vio_cap_notify;
584 			/* The notify capability has an extra field */
585 			min_len += sizeof (uint32_t);
586 			break;
587 		case VPC_ISR_CFG:
588 			cap = &vio->vio_cap_isr;
589 			break;
590 		case VPC_DEVICE_CFG:
591 			cap = &vio->vio_cap_device;
592 			break;
593 		default:
594 			/* Not interested in this cap */
595 			continue;
596 		}
597 
598 		uint8_t caplen = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
599 		    VIRTIO_PCI_CAP_LEN);
600 
601 		/* Skip short capabilities */
602 		if (caplen == PCI_EINVAL8 || caplen < min_len)
603 			continue;
604 
605 		/*
606 		 * Devices can provide multiple versions of the same capability
607 		 * type which should be in order of preference. We skip
608 		 * duplicates and use the first instance of each type we find.
609 		 */
610 		if (cap->vpc_type != 0)
611 			continue;
612 
613 		cap->vpc_baridx = pci_cap_get(pci, PCI_CAP_CFGSZ_8, idx, base,
614 		    VIRTIO_PCI_CAP_BAR);
615 		if (cap->vpc_type == PCI_EINVAL8)
616 			continue;
617 		cap->vpc_offset = pci_cap_get(pci, PCI_CAP_CFGSZ_32, idx, base,
618 		    VIRTIO_PCI_CAP_BAROFF);
619 		if (cap->vpc_offset == PCI_EINVAL32)
620 			continue;
621 		cap->vpc_size = pci_cap_get(pci, PCI_CAP_CFGSZ_32, idx, base,
622 		    VIRTIO_PCI_CAP_BARLEN);
623 		if (cap->vpc_size == PCI_EINVAL32)
624 			continue;
625 
626 		/*
627 		 * The NOTIFY_CFG capability has an additional field which is
628 		 * the multiplier to use to find the correct offset in the BAR
629 		 * for each queue. It is permissable for this to be 0, in which
630 		 * case notifications for all queues are written to the start
631 		 * of the region.
632 		 */
633 		if (type == VPC_NOTIFY_CFG) {
634 			vio->vio_multiplier = pci_cap_get(pci, PCI_CAP_CFGSZ_32,
635 			    idx, base, VIRTIO_PCI_CAP_MULTIPLIER);
636 			if (vio->vio_multiplier == PCI_EINVAL32)
637 				continue;
638 		}
639 
640 		/* Assigning the type marks this entry as valid */
641 		cap->vpc_type = type;
642 	}
643 }
644 
645 /*
646  * Enable a bit in the device status register.  Each bit signals a level of
647  * guest readiness to the host.  Use the VIRTIO_CONFIG_DEVICE_STATUS_*
648  * constants for "status".  To zero the status field use virtio_device_reset().
649  */
650 static void
virtio_set_status(virtio_t * vio,uint8_t status)651 virtio_set_status(virtio_t *vio, uint8_t status)
652 {
653 	mutex_enter(&vio->vio_mutex);
654 	vio->vio_ops->vop_set_status_locked(vio, status);
655 	mutex_exit(&vio->vio_mutex);
656 }
657 
658 void
virtio_device_reset(virtio_t * vio)659 virtio_device_reset(virtio_t *vio)
660 {
661 	mutex_enter(&vio->vio_mutex);
662 	vio->vio_ops->vop_device_reset_locked(vio);
663 	mutex_exit(&vio->vio_mutex);
664 }
665 
666 /*
667  * Some queues are effectively long-polled; the driver submits a series of
668  * buffers and the device only returns them when there is data available.
669  * During detach, we need to coordinate the return of these buffers.  Calling
670  * "virtio_shutdown()" will reset the device, then allow the removal of all
671  * buffers that were in flight at the time of shutdown via
672  * "virtio_queue_evacuate()".
673  */
674 void
virtio_shutdown(virtio_t * vio)675 virtio_shutdown(virtio_t *vio)
676 {
677 	mutex_enter(&vio->vio_mutex);
678 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
679 		/*
680 		 * Shutdown has been performed already.
681 		 */
682 		mutex_exit(&vio->vio_mutex);
683 		return;
684 	}
685 
686 	/*
687 	 * First, mark all of the queues as shutdown.  This will prevent any
688 	 * further activity.
689 	 */
690 	for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
691 	    viq = list_next(&vio->vio_queues, viq)) {
692 		mutex_enter(&viq->viq_mutex);
693 		viq->viq_shutdown = B_TRUE;
694 		mutex_exit(&viq->viq_mutex);
695 	}
696 
697 	/*
698 	 * Now, reset the device.  This removes any queue configuration on the
699 	 * device side.
700 	 */
701 	vio->vio_ops->vop_device_reset_locked(vio);
702 	vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN;
703 	mutex_exit(&vio->vio_mutex);
704 }
705 
706 /*
707  * Common implementation of quiesce(9E) for simple Virtio-based devices.
708  */
709 int
virtio_quiesce(virtio_t * vio)710 virtio_quiesce(virtio_t *vio)
711 {
712 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) {
713 		/*
714 		 * Device has already been reset.
715 		 */
716 		return (DDI_SUCCESS);
717 	}
718 
719 	/*
720 	 * When we reset the device, it should immediately stop using any DMA
721 	 * memory we've previously passed to it.  All queue configuration is
722 	 * discarded.  This is good enough for quiesce(9E).
723 	 */
724 	vio->vio_ops->vop_device_reset_locked(vio);
725 
726 	return (DDI_SUCCESS);
727 }
728 
729 /*
730  * DEVICE-SPECIFIC REGISTER ACCESS
731  *
732  * Note that these functions take the mutex to avoid racing with interrupt
733  * enable/disable, when the device-specific offset can potentially change.
734  */
735 
736 uint8_t
virtio_dev_getgen(virtio_t * vio)737 virtio_dev_getgen(virtio_t *vio)
738 {
739 	return (vio->vio_ops->vop_device_cfg_gen(vio));
740 }
741 
742 uint8_t
virtio_dev_get8(virtio_t * vio,uintptr_t offset)743 virtio_dev_get8(virtio_t *vio, uintptr_t offset)
744 {
745 	return (vio->vio_ops->vop_device_cfg_get8(vio, offset));
746 }
747 
748 uint16_t
virtio_dev_get16(virtio_t * vio,uintptr_t offset)749 virtio_dev_get16(virtio_t *vio, uintptr_t offset)
750 {
751 	return (vio->vio_ops->vop_device_cfg_get16(vio, offset));
752 }
753 
754 uint32_t
virtio_dev_get32(virtio_t * vio,uintptr_t offset)755 virtio_dev_get32(virtio_t *vio, uintptr_t offset)
756 {
757 	return (vio->vio_ops->vop_device_cfg_get32(vio, offset));
758 }
759 
760 uint64_t
virtio_dev_get64(virtio_t * vio,uintptr_t offset)761 virtio_dev_get64(virtio_t *vio, uintptr_t offset)
762 {
763 	return (vio->vio_ops->vop_device_cfg_get64(vio, offset));
764 }
765 
766 void
virtio_dev_put8(virtio_t * vio,uintptr_t offset,uint8_t value)767 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value)
768 {
769 	vio->vio_ops->vop_device_cfg_put8(vio, offset, value);
770 }
771 
772 void
virtio_dev_put16(virtio_t * vio,uintptr_t offset,uint16_t value)773 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value)
774 {
775 	vio->vio_ops->vop_device_cfg_put16(vio, offset, value);
776 }
777 
778 void
virtio_dev_put32(virtio_t * vio,uintptr_t offset,uint32_t value)779 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value)
780 {
781 	vio->vio_ops->vop_device_cfg_put32(vio, offset, value);
782 }
783 
784 /*
785  * VIRTQUEUE MANAGEMENT
786  */
787 
788 static int
virtio_inflight_compar(const void * lp,const void * rp)789 virtio_inflight_compar(const void *lp, const void *rp)
790 {
791 	const virtio_chain_t *l = lp;
792 	const virtio_chain_t *r = rp;
793 
794 	if (l->vic_head < r->vic_head) {
795 		return (-1);
796 	} else if (l->vic_head > r->vic_head) {
797 		return (1);
798 	} else {
799 		return (0);
800 	}
801 }
802 
803 virtio_queue_t *
virtio_queue_alloc(virtio_t * vio,uint16_t qidx,const char * name,ddi_intr_handler_t * func,void * funcarg,boolean_t force_direct,uint_t max_segs)804 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name,
805     ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct,
806     uint_t max_segs)
807 {
808 	char space_name[256];
809 	uint64_t noff = 0;
810 	uint16_t qsz;
811 
812 	if (max_segs < 1) {
813 		/*
814 		 * Every descriptor, direct or indirect, needs to refer to at
815 		 * least one buffer.
816 		 */
817 		dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
818 		    "segment count must be at least 1", name, (uint_t)qidx);
819 		return (NULL);
820 	}
821 
822 	mutex_enter(&vio->vio_mutex);
823 
824 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) {
825 		/*
826 		 * Cannot configure any more queues once initial setup is
827 		 * complete and interrupts have been allocated.
828 		 */
829 		dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
830 		    "alloc after init complete", name, (uint_t)qidx);
831 		mutex_exit(&vio->vio_mutex);
832 		return (NULL);
833 	}
834 
835 	qsz = vio->vio_ops->vop_queue_size_get(vio, qidx);
836 	if (qsz == 0) {
837 		/*
838 		 * A size of zero means the device does not have a queue with
839 		 * this index.
840 		 */
841 		dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
842 		    "does not exist on device", name, (uint_t)qidx);
843 		mutex_exit(&vio->vio_mutex);
844 		return (NULL);
845 	}
846 	/*
847 	 * There is no way to negotiate a different queue size for legacy
848 	 * devices.  We must read and use the native queue size of the device.
849 	 * For devices using the modern interface we could choose to reduce
850 	 * the queue size; for now we write back the value advertised by the
851 	 * device unchanged.
852 	 */
853 	if (vio->vio_ops->vop_queue_size_set != NULL)
854 		vio->vio_ops->vop_queue_size_set(vio, qidx, qsz);
855 
856 	if (virtio_modern(vio)) {
857 		noff = vio->vio_ops->vop_queue_noff_get(vio, qidx);
858 		if (noff > vio->vio_cap_notify.vpc_size - sizeof (uint32_t)) {
859 			dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) "
860 			    "invalid notification offset 0x%" PRIx64 " "
861 			    "for notify region of size 0x%" PRIx64,
862 			    name, (uint_t)qidx,
863 			    noff, vio->vio_cap_notify.vpc_size);
864 			return (NULL);
865 		}
866 	}
867 
868 	mutex_exit(&vio->vio_mutex);
869 
870 	virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP);
871 	viq->viq_virtio = vio;
872 	viq->viq_name = name;
873 	viq->viq_index = qidx;
874 	viq->viq_size = qsz;
875 	viq->viq_noff = noff;
876 	viq->viq_func = func;
877 	viq->viq_funcarg = funcarg;
878 	viq->viq_max_segs = max_segs;
879 	avl_create(&viq->viq_inflight, virtio_inflight_compar,
880 	    sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node));
881 
882 	/*
883 	 * Allocate the mutex without an interrupt priority for now, as we do
884 	 * with "vio_mutex".  We'll reinitialise it in
885 	 * "virtio_init_complete()".
886 	 */
887 	mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL);
888 
889 	if (virtio_features_present(vio, VIRTIO_F_RING_INDIRECT_DESC) &&
890 	    !force_direct) {
891 		/*
892 		 * If we were able to negotiate the indirect descriptor
893 		 * feature, and the caller has not explicitly forced the use of
894 		 * direct descriptors, we'll allocate indirect descriptor lists
895 		 * for each chain.
896 		 */
897 		viq->viq_indirect = B_TRUE;
898 	}
899 
900 	/*
901 	 * Track descriptor usage in an identifier space.
902 	 */
903 	(void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s",
904 	    ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name);
905 	if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) {
906 		dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor "
907 		    "ID space");
908 		virtio_queue_free(viq);
909 		return (NULL);
910 	}
911 
912 	/*
913 	 * For legacy devices, memory for the queue has a strict layout
914 	 * determined by the queue size, and with the device region
915 	 * starting on a fresh page. Modern and transitional devices have less
916 	 * stringent alignment requirements and virtqueues are more compact as
917 	 * a result.
918 	 */
919 	const uint_t align = virtio_modern(vio) ? MODERN_VQ_ALIGN :
920 	    VIRTIO_PAGE_SIZE;
921 
922 	const size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz;
923 	const size_t sz_driver = P2ROUNDUP_TYPED(sz_descs +
924 	    sizeof (virtio_vq_driver_t) +
925 	    sizeof (uint16_t) * qsz,
926 	    align, size_t);
927 	const size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) +
928 	    sizeof (virtio_vq_elem_t) * qsz,
929 	    align, size_t);
930 
931 	if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device,
932 	    &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
933 	    KM_SLEEP) != DDI_SUCCESS) {
934 		dev_err(vio->vio_dip, CE_WARN, "could not allocate queue "
935 		    "DMA memory");
936 		virtio_queue_free(viq);
937 		return (NULL);
938 	}
939 
940 	/*
941 	 * NOTE: The viq_dma_* members below are used by
942 	 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate
943 	 * offsets into the DMA allocation for partial synchronisation.  If the
944 	 * ordering of, or relationship between, these pointers changes, the
945 	 * macros must be kept in sync.
946 	 */
947 	viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0);
948 	viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs);
949 	viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver);
950 
951 	/*
952 	 * Install in the per-device list of queues.
953 	 */
954 	mutex_enter(&vio->vio_mutex);
955 	for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL;
956 	    chkvq = list_next(&vio->vio_queues, chkvq)) {
957 		if (chkvq->viq_index == qidx) {
958 			dev_err(vio->vio_dip, CE_WARN, "attempt to register "
959 			    "queue \"%s\" with same index (%d) as queue \"%s\"",
960 			    name, qidx, chkvq->viq_name);
961 			mutex_exit(&vio->vio_mutex);
962 			virtio_queue_free(viq);
963 			return (NULL);
964 		}
965 	}
966 	list_insert_tail(&vio->vio_queues, viq);
967 
968 	/*
969 	 * Ensure the zeroing of the queue memory is visible to the host before
970 	 * we inform the device of the queue address.
971 	 */
972 	membar_producer();
973 	VIRTQ_DMA_SYNC_FORDEV(viq);
974 
975 	const uint64_t pa = virtio_dma_cookie_pa(&viq->viq_dma, 0);
976 	vio->vio_ops->vop_queue_addr_set(vio, qidx,
977 	    pa, pa + sz_descs, pa + sz_driver);
978 
979 	mutex_exit(&vio->vio_mutex);
980 	return (viq);
981 }
982 
983 static void
virtio_queue_free(virtio_queue_t * viq)984 virtio_queue_free(virtio_queue_t *viq)
985 {
986 	virtio_t *vio = viq->viq_virtio;
987 
988 	/*
989 	 * We are going to destroy the queue mutex.  Make sure we've already
990 	 * removed the interrupt handlers.
991 	 */
992 	VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED));
993 
994 	mutex_enter(&viq->viq_mutex);
995 
996 	/*
997 	 * If the device has not already been reset as part of a shutdown,
998 	 * detach the queue from the device now.
999 	 */
1000 	if (!viq->viq_shutdown) {
1001 		vio->vio_ops->vop_queue_enable_set(vio, viq->viq_index, false);
1002 		vio->vio_ops->vop_queue_addr_set(vio, viq->viq_index, 0, 0, 0);
1003 	}
1004 
1005 	virtio_dma_fini(&viq->viq_dma);
1006 
1007 	VERIFY(avl_is_empty(&viq->viq_inflight));
1008 	avl_destroy(&viq->viq_inflight);
1009 	if (viq->viq_descmap != NULL) {
1010 		id_space_destroy(viq->viq_descmap);
1011 	}
1012 
1013 	mutex_exit(&viq->viq_mutex);
1014 	mutex_destroy(&viq->viq_mutex);
1015 
1016 	kmem_free(viq, sizeof (*viq));
1017 }
1018 
1019 void
virtio_queue_no_interrupt(virtio_queue_t * viq,boolean_t stop_interrupts)1020 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts)
1021 {
1022 	mutex_enter(&viq->viq_mutex);
1023 
1024 	if (stop_interrupts) {
1025 		viq->viq_dma_driver->vqdr_flags |=
1026 		    viq_gtoh16(viq, VIRTQ_AVAIL_F_NO_INTERRUPT);
1027 	} else {
1028 		viq->viq_dma_driver->vqdr_flags &=
1029 		    viq_gtoh16(viq, ~VIRTQ_AVAIL_F_NO_INTERRUPT);
1030 	}
1031 	VIRTQ_DMA_SYNC_FORDEV(viq);
1032 
1033 	mutex_exit(&viq->viq_mutex);
1034 }
1035 
1036 static virtio_chain_t *
virtio_queue_complete(virtio_queue_t * viq,uint_t index)1037 virtio_queue_complete(virtio_queue_t *viq, uint_t index)
1038 {
1039 	VERIFY(MUTEX_HELD(&viq->viq_mutex));
1040 
1041 	virtio_chain_t *vic;
1042 
1043 	virtio_chain_t search;
1044 	bzero(&search, sizeof (search));
1045 	search.vic_head = index;
1046 
1047 	if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) {
1048 		return (NULL);
1049 	}
1050 	avl_remove(&viq->viq_inflight, vic);
1051 
1052 	return (vic);
1053 }
1054 
1055 uint_t
virtio_queue_size(virtio_queue_t * viq)1056 virtio_queue_size(virtio_queue_t *viq)
1057 {
1058 	return (viq->viq_size);
1059 }
1060 
1061 uint_t
virtio_queue_nactive(virtio_queue_t * viq)1062 virtio_queue_nactive(virtio_queue_t *viq)
1063 {
1064 	mutex_enter(&viq->viq_mutex);
1065 	uint_t r = avl_numnodes(&viq->viq_inflight);
1066 	mutex_exit(&viq->viq_mutex);
1067 
1068 	return (r);
1069 }
1070 
1071 virtio_chain_t *
virtio_queue_poll(virtio_queue_t * viq)1072 virtio_queue_poll(virtio_queue_t *viq)
1073 {
1074 	mutex_enter(&viq->viq_mutex);
1075 	if (viq->viq_shutdown) {
1076 		/*
1077 		 * The device has been reset by virtio_shutdown(), and queue
1078 		 * processing has been halted.  Any previously submitted chains
1079 		 * will be evacuated using virtio_queue_evacuate().
1080 		 */
1081 		mutex_exit(&viq->viq_mutex);
1082 		return (NULL);
1083 	}
1084 
1085 	VIRTQ_DMA_SYNC_FORKERNEL(viq);
1086 	uint16_t dindex = viq_htog16(viq, viq->viq_dma_device->vqde_index);
1087 	if (viq->viq_device_index == dindex) {
1088 		/*
1089 		 * If the device index has not changed since the last poll,
1090 		 * there are no new chains to process.
1091 		 */
1092 		mutex_exit(&viq->viq_mutex);
1093 		return (NULL);
1094 	}
1095 
1096 	/*
1097 	 * We need to ensure that all reads from the descriptor (vqde_ring[])
1098 	 * and any referenced memory by the descriptor occur after we have read
1099 	 * the descriptor index value above (vqde_index).
1100 	 */
1101 	membar_consumer();
1102 
1103 	uint16_t index = (viq->viq_device_index++) % viq->viq_size;
1104 	uint16_t start = viq_htog16(viq,
1105 	    viq->viq_dma_device->vqde_ring[index].vqe_start);
1106 	uint32_t len = viq_htog32(viq,
1107 	    viq->viq_dma_device->vqde_ring[index].vqe_len);
1108 
1109 	virtio_chain_t *vic;
1110 	if ((vic = virtio_queue_complete(viq, start)) == NULL) {
1111 		/*
1112 		 * We could not locate a chain for this descriptor index, which
1113 		 * suggests that something has gone horribly wrong.
1114 		 */
1115 		dev_err(viq->viq_virtio->vio_dip, CE_PANIC,
1116 		    "queue \"%s\" ring entry %u (descriptor %u) has no chain",
1117 		    viq->viq_name, (uint16_t)index, (uint16_t)start);
1118 	}
1119 
1120 	vic->vic_received_length = len;
1121 
1122 	mutex_exit(&viq->viq_mutex);
1123 
1124 	return (vic);
1125 }
1126 
1127 /*
1128  * After a call to "virtio_shutdown()", the driver must retrieve any previously
1129  * submitted chains and free any associated resources.
1130  */
1131 virtio_chain_t *
virtio_queue_evacuate(virtio_queue_t * viq)1132 virtio_queue_evacuate(virtio_queue_t *viq)
1133 {
1134 	virtio_t *vio = viq->viq_virtio;
1135 
1136 	mutex_enter(&vio->vio_mutex);
1137 	if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) {
1138 		dev_err(vio->vio_dip, CE_PANIC,
1139 		    "virtio_queue_evacuate() without virtio_shutdown()");
1140 	}
1141 	mutex_exit(&vio->vio_mutex);
1142 
1143 	mutex_enter(&viq->viq_mutex);
1144 	VERIFY(viq->viq_shutdown);
1145 
1146 	virtio_chain_t *vic = avl_first(&viq->viq_inflight);
1147 	if (vic != NULL) {
1148 		avl_remove(&viq->viq_inflight, vic);
1149 	}
1150 
1151 	mutex_exit(&viq->viq_mutex);
1152 
1153 	return (vic);
1154 }
1155 
1156 /*
1157  * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT
1158  */
1159 
1160 /*
1161  * When the device returns a descriptor chain to the driver, it may provide the
1162  * length in bytes of data written into the chain.  Client drivers should use
1163  * this value with care; the specification suggests some device implementations
1164  * have not always provided a useful or correct value.
1165  */
1166 size_t
virtio_chain_received_length(virtio_chain_t * vic)1167 virtio_chain_received_length(virtio_chain_t *vic)
1168 {
1169 	return (vic->vic_received_length);
1170 }
1171 
1172 /*
1173  * Allocate a descriptor chain for use with this queue.  The "kmflags" value
1174  * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F).
1175  */
1176 virtio_chain_t *
virtio_chain_alloc(virtio_queue_t * viq,int kmflags)1177 virtio_chain_alloc(virtio_queue_t *viq, int kmflags)
1178 {
1179 	virtio_t *vio = viq->viq_virtio;
1180 	virtio_chain_t *vic;
1181 	uint_t cap;
1182 
1183 	/*
1184 	 * Direct descriptors are known by their index in the descriptor table
1185 	 * for the queue.  We use the variable-length array member at the end
1186 	 * of the chain tracking object to hold the list of direct descriptors
1187 	 * assigned to this chain.
1188 	 */
1189 	if (viq->viq_indirect) {
1190 		/*
1191 		 * When using indirect descriptors we still need one direct
1192 		 * descriptor entry to hold the physical address and length of
1193 		 * the indirect descriptor table.
1194 		 */
1195 		cap = 1;
1196 	} else {
1197 		/*
1198 		 * For direct descriptors we need to be able to track a
1199 		 * descriptor for each possible segment in a single chain.
1200 		 */
1201 		cap = viq->viq_max_segs;
1202 	}
1203 
1204 	size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap;
1205 	if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) {
1206 		return (NULL);
1207 	}
1208 	vic->vic_vq = viq;
1209 	vic->vic_direct_capacity = cap;
1210 
1211 	if (viq->viq_indirect) {
1212 		/*
1213 		 * Allocate an indirect descriptor list with the appropriate
1214 		 * number of entries.
1215 		 */
1216 		if (virtio_dma_init(vio, &vic->vic_indirect_dma,
1217 		    sizeof (virtio_vq_desc_t) * viq->viq_max_segs,
1218 		    &virtio_dma_attr_indirect,
1219 		    DDI_DMA_CONSISTENT | DDI_DMA_WRITE,
1220 		    kmflags) != DDI_SUCCESS) {
1221 			goto fail;
1222 		}
1223 
1224 		/*
1225 		 * Allocate a single descriptor to hold the indirect list.
1226 		 * Leave the length as zero for now; it will be set to include
1227 		 * any occupied entries at push time.
1228 		 */
1229 		mutex_enter(&viq->viq_mutex);
1230 		if (virtio_chain_append_impl(vic,
1231 		    virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0,
1232 		    VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) {
1233 			mutex_exit(&viq->viq_mutex);
1234 			goto fail;
1235 		}
1236 		mutex_exit(&viq->viq_mutex);
1237 		VERIFY3U(vic->vic_direct_used, ==, 1);
1238 
1239 		/*
1240 		 * Don't set the indirect capacity until after we've installed
1241 		 * the direct descriptor which points at the indirect list, or
1242 		 * virtio_chain_append_impl() will be confused.
1243 		 */
1244 		vic->vic_indirect_capacity = viq->viq_max_segs;
1245 	}
1246 
1247 	return (vic);
1248 
1249 fail:
1250 	virtio_dma_fini(&vic->vic_indirect_dma);
1251 	kmem_free(vic, vicsz);
1252 	return (NULL);
1253 }
1254 
1255 void *
virtio_chain_data(virtio_chain_t * vic)1256 virtio_chain_data(virtio_chain_t *vic)
1257 {
1258 	return (vic->vic_data);
1259 }
1260 
1261 void
virtio_chain_data_set(virtio_chain_t * vic,void * data)1262 virtio_chain_data_set(virtio_chain_t *vic, void *data)
1263 {
1264 	vic->vic_data = data;
1265 }
1266 
1267 void
virtio_chain_clear(virtio_chain_t * vic)1268 virtio_chain_clear(virtio_chain_t *vic)
1269 {
1270 	if (vic->vic_indirect_capacity != 0) {
1271 		/*
1272 		 * There should only be one direct descriptor, which points at
1273 		 * our indirect descriptor list.  We don't want to clear it
1274 		 * here.
1275 		 */
1276 		VERIFY3U(vic->vic_direct_capacity, ==, 1);
1277 
1278 		if (vic->vic_indirect_used > 0) {
1279 			/*
1280 			 * Clear out the indirect descriptor table.
1281 			 */
1282 			vic->vic_indirect_used = 0;
1283 			bzero(virtio_dma_va(&vic->vic_indirect_dma, 0),
1284 			    virtio_dma_size(&vic->vic_indirect_dma));
1285 		}
1286 
1287 	} else if (vic->vic_direct_capacity > 0) {
1288 		/*
1289 		 * Release any descriptors that were assigned to us previously.
1290 		 */
1291 		for (uint_t i = 0; i < vic->vic_direct_used; i++) {
1292 			id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]);
1293 			vic->vic_direct[i] = 0;
1294 		}
1295 		vic->vic_direct_used = 0;
1296 	}
1297 }
1298 
1299 void
virtio_chain_free(virtio_chain_t * vic)1300 virtio_chain_free(virtio_chain_t *vic)
1301 {
1302 	/*
1303 	 * First ensure that we have released any descriptors used by this
1304 	 * chain.
1305 	 */
1306 	virtio_chain_clear(vic);
1307 
1308 	if (vic->vic_indirect_capacity > 0) {
1309 		/*
1310 		 * Release the direct descriptor that points to our indirect
1311 		 * descriptor list.
1312 		 */
1313 		VERIFY3U(vic->vic_direct_capacity, ==, 1);
1314 		id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]);
1315 
1316 		virtio_dma_fini(&vic->vic_indirect_dma);
1317 	}
1318 
1319 	size_t vicsz = sizeof (*vic) +
1320 	    vic->vic_direct_capacity * sizeof (uint16_t);
1321 
1322 	kmem_free(vic, vicsz);
1323 }
1324 
1325 static inline int
virtio_queue_descmap_alloc(virtio_queue_t * viq,uint_t * indexp)1326 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp)
1327 {
1328 	id_t index;
1329 
1330 	if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) {
1331 		return (ENOMEM);
1332 	}
1333 
1334 	VERIFY3S(index, >=, 0);
1335 	VERIFY3S(index, <=, viq->viq_size);
1336 
1337 	*indexp = (uint_t)index;
1338 	return (0);
1339 }
1340 
1341 static int
virtio_chain_append_impl(virtio_chain_t * vic,uint64_t pa,size_t len,uint16_t flags)1342 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len,
1343     uint16_t flags)
1344 {
1345 	virtio_queue_t *viq = vic->vic_vq;
1346 	virtio_vq_desc_t *vqd;
1347 	uint_t index;
1348 
1349 	/*
1350 	 * We're modifying the queue-wide descriptor list so make sure we have
1351 	 * the appropriate lock.
1352 	 */
1353 	VERIFY(MUTEX_HELD(&viq->viq_mutex));
1354 
1355 	if (vic->vic_indirect_capacity != 0) {
1356 		/*
1357 		 * Use indirect descriptors.
1358 		 */
1359 		if (vic->vic_indirect_used >= vic->vic_indirect_capacity) {
1360 			return (DDI_FAILURE);
1361 		}
1362 
1363 		vqd = virtio_dma_va(&vic->vic_indirect_dma, 0);
1364 
1365 		if ((index = vic->vic_indirect_used++) > 0) {
1366 			/*
1367 			 * Chain the current last indirect descriptor to the
1368 			 * new one.
1369 			 */
1370 			vqd[index - 1].vqd_flags |=
1371 			    viq_gtoh16(viq, VIRTQ_DESC_F_NEXT);
1372 			vqd[index - 1].vqd_next = viq_gtoh16(viq, index);
1373 		}
1374 
1375 	} else {
1376 		/*
1377 		 * Use direct descriptors.
1378 		 */
1379 		if (vic->vic_direct_used >= vic->vic_direct_capacity) {
1380 			return (DDI_FAILURE);
1381 		}
1382 
1383 		if (virtio_queue_descmap_alloc(viq, &index) != 0) {
1384 			return (DDI_FAILURE);
1385 		}
1386 
1387 		vqd = virtio_dma_va(&viq->viq_dma, 0);
1388 
1389 		if (vic->vic_direct_used > 0) {
1390 			/*
1391 			 * This is not the first entry.  Chain the current
1392 			 * descriptor to the next one.
1393 			 */
1394 			uint16_t p = vic->vic_direct[vic->vic_direct_used - 1];
1395 
1396 			vqd[p].vqd_flags |=
1397 			    viq_gtoh16(viq, VIRTQ_DESC_F_NEXT);
1398 			vqd[p].vqd_next = viq_gtoh16(viq, index);
1399 		}
1400 		vic->vic_direct[vic->vic_direct_used++] = index;
1401 	}
1402 
1403 	vqd[index].vqd_addr = viq_gtoh64(viq, pa);
1404 	vqd[index].vqd_len = viq_gtoh32(viq, len);
1405 	vqd[index].vqd_flags = viq_gtoh16(viq, flags);
1406 	vqd[index].vqd_next = 0;
1407 
1408 	return (DDI_SUCCESS);
1409 }
1410 
1411 int
virtio_chain_append(virtio_chain_t * vic,uint64_t pa,size_t len,virtio_direction_t dir)1412 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len,
1413     virtio_direction_t dir)
1414 {
1415 	virtio_queue_t *viq = vic->vic_vq;
1416 	uint16_t flags = 0;
1417 
1418 	switch (dir) {
1419 	case VIRTIO_DIR_DEVICE_WRITES:
1420 		flags |= VIRTQ_DESC_F_WRITE;
1421 		break;
1422 
1423 	case VIRTIO_DIR_DEVICE_READS:
1424 		break;
1425 
1426 	default:
1427 		panic("unknown direction value %u", dir);
1428 	}
1429 
1430 	mutex_enter(&viq->viq_mutex);
1431 	int r = virtio_chain_append_impl(vic, pa, len, flags);
1432 	mutex_exit(&viq->viq_mutex);
1433 
1434 	return (r);
1435 }
1436 
1437 static void
virtio_queue_flush_locked(virtio_queue_t * viq)1438 virtio_queue_flush_locked(virtio_queue_t *viq)
1439 {
1440 	virtio_t *vio = viq->viq_virtio;
1441 
1442 	VERIFY(MUTEX_HELD(&viq->viq_mutex));
1443 
1444 	/*
1445 	 * Make sure any writes we have just made to the descriptors
1446 	 * (vqdr_ring[]) are visible to the device before we update the ring
1447 	 * pointer (vqdr_index).
1448 	 */
1449 	membar_producer();
1450 	viq->viq_dma_driver->vqdr_index =
1451 	    viq_gtoh16(viq, viq->viq_driver_index);
1452 	VIRTQ_DMA_SYNC_FORDEV(viq);
1453 
1454 	/*
1455 	 * Determine whether the device expects us to notify it of new
1456 	 * descriptors.
1457 	 */
1458 	VIRTQ_DMA_SYNC_FORKERNEL(viq);
1459 	if (!(viq->viq_dma_device->vqde_flags &
1460 	    viq_gtoh16(viq, VIRTQ_USED_F_NO_NOTIFY))) {
1461 		vio->vio_ops->vop_queue_notify(viq);
1462 	}
1463 }
1464 
1465 void
virtio_queue_flush(virtio_queue_t * viq)1466 virtio_queue_flush(virtio_queue_t *viq)
1467 {
1468 	mutex_enter(&viq->viq_mutex);
1469 	virtio_queue_flush_locked(viq);
1470 	mutex_exit(&viq->viq_mutex);
1471 }
1472 
1473 void
virtio_chain_submit(virtio_chain_t * vic,boolean_t flush)1474 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush)
1475 {
1476 	virtio_queue_t *viq = vic->vic_vq;
1477 
1478 	mutex_enter(&viq->viq_mutex);
1479 
1480 	if (vic->vic_indirect_capacity != 0) {
1481 		virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0);
1482 
1483 		VERIFY3U(vic->vic_direct_used, ==, 1);
1484 
1485 		/*
1486 		 * This is an indirect descriptor queue.  The length in bytes
1487 		 * of the descriptor must extend to cover the populated
1488 		 * indirect descriptor entries.
1489 		 */
1490 		vqd[vic->vic_direct[0]].vqd_len = viq_gtoh32(viq,
1491 		    sizeof (virtio_vq_desc_t) * vic->vic_indirect_used);
1492 
1493 		virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV);
1494 	}
1495 
1496 	/*
1497 	 * Populate the next available slot in the driver-owned ring for this
1498 	 * chain.  The updated value of viq_driver_index is not yet visible to
1499 	 * the device until a subsequent queue flush.
1500 	 */
1501 	uint16_t index = (viq->viq_driver_index++) % viq->viq_size;
1502 	viq->viq_dma_driver->vqdr_ring[index] =
1503 	    viq_gtoh16(viq, vic->vic_direct[0]);
1504 
1505 	vic->vic_head = vic->vic_direct[0];
1506 	avl_add(&viq->viq_inflight, vic);
1507 
1508 	if (flush) {
1509 		virtio_queue_flush_locked(vic->vic_vq);
1510 	}
1511 
1512 	mutex_exit(&viq->viq_mutex);
1513 }
1514 
1515 /*
1516  * INTERRUPTS MANAGEMENT
1517  */
1518 
1519 static const char *
virtio_interrupt_type_name(int type)1520 virtio_interrupt_type_name(int type)
1521 {
1522 	switch (type) {
1523 	case DDI_INTR_TYPE_MSIX:
1524 		return ("MSI-X");
1525 	case DDI_INTR_TYPE_MSI:
1526 		return ("MSI");
1527 	case DDI_INTR_TYPE_FIXED:
1528 		return ("fixed");
1529 	default:
1530 		return ("?");
1531 	}
1532 }
1533 
1534 static int
virtio_interrupts_alloc(virtio_t * vio,int type,int nrequired)1535 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired)
1536 {
1537 	dev_info_t *dip = vio->vio_dip;
1538 	int nintrs = 0;
1539 	int navail = 0;
1540 
1541 	VERIFY(MUTEX_HELD(&vio->vio_mutex));
1542 	VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC));
1543 
1544 	if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) {
1545 		dev_err(dip, CE_WARN, "could not count %s interrupts",
1546 		    virtio_interrupt_type_name(type));
1547 		return (DDI_FAILURE);
1548 	}
1549 	if (nintrs < 1) {
1550 		dev_err(dip, CE_WARN, "no %s interrupts supported",
1551 		    virtio_interrupt_type_name(type));
1552 		return (DDI_FAILURE);
1553 	}
1554 
1555 	if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) {
1556 		dev_err(dip, CE_WARN, "could not count available %s interrupts",
1557 		    virtio_interrupt_type_name(type));
1558 		return (DDI_FAILURE);
1559 	}
1560 	if (navail < nrequired) {
1561 		dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d "
1562 		    "available", nrequired, virtio_interrupt_type_name(type),
1563 		    navail);
1564 		return (DDI_FAILURE);
1565 	}
1566 
1567 	VERIFY3P(vio->vio_interrupts, ==, NULL);
1568 	vio->vio_interrupts = kmem_zalloc(
1569 	    sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP);
1570 
1571 	int r;
1572 	if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired,
1573 	    &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) {
1574 		dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)",
1575 		    virtio_interrupt_type_name(type), r);
1576 		kmem_free(vio->vio_interrupts,
1577 		    sizeof (ddi_intr_handle_t) * nrequired);
1578 		vio->vio_interrupts = NULL;
1579 		return (DDI_FAILURE);
1580 	}
1581 
1582 	vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC;
1583 	vio->vio_interrupt_type = type;
1584 	return (DDI_SUCCESS);
1585 }
1586 
1587 static uint_t
virtio_shared_isr(caddr_t arg0,caddr_t arg1)1588 virtio_shared_isr(caddr_t arg0, caddr_t arg1)
1589 {
1590 	virtio_t *vio = (virtio_t *)arg0;
1591 	uint_t r = DDI_INTR_UNCLAIMED;
1592 	uint8_t isr;
1593 
1594 	mutex_enter(&vio->vio_mutex);
1595 
1596 	/*
1597 	 * Check the ISR status to see if the interrupt applies to us.  Reading
1598 	 * this field resets it to zero.
1599 	 */
1600 	isr = vio->vio_ops->vop_isr_status(vio);
1601 
1602 	if ((isr & VIRTIO_ISR_CHECK_QUEUES) != 0) {
1603 		r = DDI_INTR_CLAIMED;
1604 
1605 		for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1606 		    viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1607 			if (viq->viq_func != NULL) {
1608 				mutex_exit(&vio->vio_mutex);
1609 				(void) viq->viq_func(viq->viq_funcarg, arg0);
1610 				mutex_enter(&vio->vio_mutex);
1611 
1612 				if (vio->vio_initlevel &
1613 				    VIRTIO_INITLEVEL_SHUTDOWN) {
1614 					/*
1615 					 * The device was shut down while in a
1616 					 * queue handler routine.
1617 					 */
1618 					break;
1619 				}
1620 			}
1621 		}
1622 	}
1623 
1624 	mutex_exit(&vio->vio_mutex);
1625 
1626 	/*
1627 	 * vio_cfgchange_{handler,handlerarg} cannot change while interrupts
1628 	 * are configured so it is safe to access them outside of the lock.
1629 	 */
1630 
1631 	if ((isr & VIRTIO_ISR_CHECK_CONFIG) != 0) {
1632 		r = DDI_INTR_CLAIMED;
1633 		if (vio->vio_cfgchange_handler != NULL) {
1634 			(void) vio->vio_cfgchange_handler(
1635 			    (caddr_t)vio->vio_cfgchange_handlerarg,
1636 			    (caddr_t)vio);
1637 		}
1638 	}
1639 
1640 	return (r);
1641 }
1642 
1643 static int
virtio_interrupts_setup(virtio_t * vio,int allow_types)1644 virtio_interrupts_setup(virtio_t *vio, int allow_types)
1645 {
1646 	dev_info_t *dip = vio->vio_dip;
1647 	int types;
1648 	int count = 0;
1649 
1650 	mutex_enter(&vio->vio_mutex);
1651 
1652 	/*
1653 	 * Determine the number of interrupts we'd like based on the number of
1654 	 * virtqueues.
1655 	 */
1656 	for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1657 	    viq = list_next(&vio->vio_queues, viq)) {
1658 		if (viq->viq_func != NULL) {
1659 			count++;
1660 		}
1661 	}
1662 
1663 	/*
1664 	 * If there is a configuration change handler, one extra interrupt
1665 	 * is needed for that.
1666 	 */
1667 	if (vio->vio_cfgchange_handler != NULL)
1668 		count++;
1669 
1670 	if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) {
1671 		dev_err(dip, CE_WARN, "could not get supported interrupts");
1672 		mutex_exit(&vio->vio_mutex);
1673 		return (DDI_FAILURE);
1674 	}
1675 
1676 	if (allow_types != VIRTIO_ANY_INTR_TYPE) {
1677 		/*
1678 		 * Restrict the possible interrupt types at the request of the
1679 		 * driver.
1680 		 */
1681 		types &= allow_types;
1682 	}
1683 
1684 	/*
1685 	 * Try each potential interrupt type in descending order of preference.
1686 	 * Note that the specification does not appear to allow for the use of
1687 	 * classical MSI, so we are limited to either MSI-X or fixed
1688 	 * interrupts.
1689 	 */
1690 	if (types & DDI_INTR_TYPE_MSIX) {
1691 		if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX,
1692 		    count) == DDI_SUCCESS) {
1693 			goto add_handlers;
1694 		}
1695 	}
1696 	if (types & DDI_INTR_TYPE_FIXED) {
1697 		/*
1698 		 * If fixed interrupts are all that are available, we'll just
1699 		 * ask for one.
1700 		 */
1701 		if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) ==
1702 		    DDI_SUCCESS) {
1703 			goto add_handlers;
1704 		}
1705 	}
1706 
1707 	dev_err(dip, CE_WARN, "interrupt allocation failed");
1708 	mutex_exit(&vio->vio_mutex);
1709 	return (DDI_FAILURE);
1710 
1711 add_handlers:
1712 	/*
1713 	 * Ensure that we have not been given any high-level interrupts as our
1714 	 * interrupt handlers do not support them.
1715 	 */
1716 	for (int i = 0; i < vio->vio_ninterrupts; i++) {
1717 		uint_t ipri;
1718 
1719 		if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) !=
1720 		    DDI_SUCCESS) {
1721 			dev_err(dip, CE_WARN, "could not determine interrupt "
1722 			    "priority");
1723 			goto fail;
1724 		}
1725 
1726 		if (ipri >= ddi_intr_get_hilevel_pri()) {
1727 			dev_err(dip, CE_WARN, "high level interrupts not "
1728 			    "supported");
1729 			goto fail;
1730 		}
1731 
1732 		/*
1733 		 * Record the highest priority we've been allocated to use for
1734 		 * mutex initialisation.
1735 		 */
1736 		if (i == 0 || ipri > vio->vio_interrupt_priority) {
1737 			vio->vio_interrupt_priority = ipri;
1738 		}
1739 	}
1740 
1741 	/*
1742 	 * Get the interrupt capabilities from the first handle to determine
1743 	 * whether we need to use ddi_intr_block_enable(9F).
1744 	 */
1745 	if (ddi_intr_get_cap(vio->vio_interrupts[0],
1746 	    &vio->vio_interrupt_cap) != DDI_SUCCESS) {
1747 		dev_err(dip, CE_WARN, "failed to get interrupt capabilities");
1748 		goto fail;
1749 	}
1750 
1751 	if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1752 		VERIFY3S(vio->vio_ninterrupts, ==, 1);
1753 		/*
1754 		 * For fixed interrupts, we need to use our shared handler to
1755 		 * multiplex the per-queue handlers provided by the driver.
1756 		 */
1757 		if (ddi_intr_add_handler(vio->vio_interrupts[0],
1758 		    virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) {
1759 			dev_err(dip, CE_WARN, "adding shared %s interrupt "
1760 			    "handler failed", virtio_interrupt_type_name(
1761 			    vio->vio_interrupt_type));
1762 			goto fail;
1763 		}
1764 
1765 		goto done;
1766 	}
1767 
1768 	VERIFY3S(vio->vio_ninterrupts, ==, count);
1769 
1770 	uint_t n = 0;
1771 
1772 	/* Bind the configuration vector interrupt */
1773 	if (vio->vio_cfgchange_handler != NULL) {
1774 		if (ddi_intr_add_handler(vio->vio_interrupts[n],
1775 		    vio->vio_cfgchange_handler,
1776 		    (caddr_t)vio->vio_cfgchange_handlerarg,
1777 		    (caddr_t)vio) != DDI_SUCCESS) {
1778 			dev_err(dip, CE_WARN,
1779 			    "adding configuration change interrupt failed");
1780 			goto fail;
1781 		}
1782 		vio->vio_cfgchange_handler_added = B_TRUE;
1783 		vio->vio_cfgchange_handler_index = n;
1784 		n++;
1785 	}
1786 
1787 	for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL;
1788 	    viq = list_next(&vio->vio_queues, viq)) {
1789 		if (viq->viq_func == NULL) {
1790 			continue;
1791 		}
1792 
1793 		if (ddi_intr_add_handler(vio->vio_interrupts[n],
1794 		    viq->viq_func, (caddr_t)viq->viq_funcarg,
1795 		    (caddr_t)vio) != DDI_SUCCESS) {
1796 			dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed",
1797 			    n, viq->viq_name);
1798 			goto fail;
1799 		}
1800 
1801 		viq->viq_handler_index = n;
1802 		viq->viq_handler_added = B_TRUE;
1803 		n++;
1804 	}
1805 
1806 done:
1807 	vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED;
1808 	mutex_exit(&vio->vio_mutex);
1809 	return (DDI_SUCCESS);
1810 
1811 fail:
1812 	virtio_interrupts_teardown(vio);
1813 	mutex_exit(&vio->vio_mutex);
1814 	return (DDI_FAILURE);
1815 }
1816 
1817 static void
virtio_interrupts_teardown(virtio_t * vio)1818 virtio_interrupts_teardown(virtio_t *vio)
1819 {
1820 	VERIFY(MUTEX_HELD(&vio->vio_mutex));
1821 
1822 	virtio_interrupts_disable_locked(vio);
1823 
1824 	if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) {
1825 		/*
1826 		 * Remove the multiplexing interrupt handler.
1827 		 */
1828 		if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) {
1829 			int r;
1830 
1831 			VERIFY3S(vio->vio_ninterrupts, ==, 1);
1832 
1833 			if ((r = ddi_intr_remove_handler(
1834 			    vio->vio_interrupts[0])) != DDI_SUCCESS) {
1835 				dev_err(vio->vio_dip, CE_WARN, "removing "
1836 				    "shared interrupt handler failed (%d)", r);
1837 			}
1838 		}
1839 	} else {
1840 		/*
1841 		 * Remove the configuration vector interrupt handler.
1842 		 */
1843 		if (vio->vio_cfgchange_handler_added) {
1844 			int r;
1845 
1846 			if ((r = ddi_intr_remove_handler(
1847 			    vio->vio_interrupts[0])) != DDI_SUCCESS) {
1848 				dev_err(vio->vio_dip, CE_WARN,
1849 				    "removing configuration change interrupt "
1850 				    "handler failed (%d)", r);
1851 			}
1852 			vio->vio_cfgchange_handler_added = B_FALSE;
1853 		}
1854 
1855 		for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1856 		    viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1857 			int r;
1858 
1859 			if (!viq->viq_handler_added) {
1860 				continue;
1861 			}
1862 
1863 			if ((r = ddi_intr_remove_handler(
1864 			    vio->vio_interrupts[viq->viq_handler_index])) !=
1865 			    DDI_SUCCESS) {
1866 				dev_err(vio->vio_dip, CE_WARN, "removing "
1867 				    "interrupt handler (%s) failed (%d)",
1868 				    viq->viq_name, r);
1869 			}
1870 
1871 			viq->viq_handler_added = B_FALSE;
1872 		}
1873 	}
1874 	vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED;
1875 
1876 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) {
1877 		for (int i = 0; i < vio->vio_ninterrupts; i++) {
1878 			int r;
1879 
1880 			if ((r = ddi_intr_free(vio->vio_interrupts[i])) !=
1881 			    DDI_SUCCESS) {
1882 				dev_err(vio->vio_dip, CE_WARN, "freeing "
1883 				    "interrupt %u failed (%d)", i, r);
1884 			}
1885 		}
1886 		kmem_free(vio->vio_interrupts,
1887 		    sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts);
1888 		vio->vio_interrupts = NULL;
1889 		vio->vio_ninterrupts = 0;
1890 		vio->vio_interrupt_type = 0;
1891 		vio->vio_interrupt_cap = 0;
1892 		vio->vio_interrupt_priority = 0;
1893 
1894 		vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC;
1895 	}
1896 }
1897 
1898 static void
virtio_interrupts_unwind(virtio_t * vio)1899 virtio_interrupts_unwind(virtio_t *vio)
1900 {
1901 	VERIFY(MUTEX_HELD(&vio->vio_mutex));
1902 
1903 	if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1904 		for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1905 		    viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1906 			if (!viq->viq_handler_added) {
1907 				continue;
1908 			}
1909 
1910 			vio->vio_ops->vop_msix_queue_set(vio, viq->viq_index,
1911 			    VIRTIO_LEGACY_MSI_NO_VECTOR);
1912 		}
1913 
1914 		if (vio->vio_cfgchange_handler_added) {
1915 			vio->vio_ops->vop_msix_config_set(vio,
1916 			    VIRTIO_LEGACY_MSI_NO_VECTOR);
1917 		}
1918 	}
1919 
1920 	if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1921 		(void) ddi_intr_block_disable(vio->vio_interrupts,
1922 		    vio->vio_ninterrupts);
1923 	} else {
1924 		for (int i = 0; i < vio->vio_ninterrupts; i++) {
1925 			(void) ddi_intr_disable(vio->vio_interrupts[i]);
1926 		}
1927 	}
1928 
1929 	/*
1930 	 * Disabling the interrupts makes the MSI-X fields disappear from the
1931 	 * BAR once more in the legacy interface.
1932 	 */
1933 	if (!virtio_modern(vio))
1934 		vio->vio_legacy_cfg_offset = VIRTIO_LEGACY_CFG_OFFSET;
1935 }
1936 
1937 int
virtio_interrupts_enable(virtio_t * vio)1938 virtio_interrupts_enable(virtio_t *vio)
1939 {
1940 	mutex_enter(&vio->vio_mutex);
1941 	if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) {
1942 		mutex_exit(&vio->vio_mutex);
1943 		return (DDI_SUCCESS);
1944 	}
1945 
1946 	int r = DDI_SUCCESS;
1947 	if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) {
1948 		r = ddi_intr_block_enable(vio->vio_interrupts,
1949 		    vio->vio_ninterrupts);
1950 	} else {
1951 		for (int i = 0; i < vio->vio_ninterrupts; i++) {
1952 			if ((r = ddi_intr_enable(vio->vio_interrupts[i])) !=
1953 			    DDI_SUCCESS) {
1954 				/*
1955 				 * Disable the interrupts we have enabled so
1956 				 * far.
1957 				 */
1958 				for (i--; i >= 0; i--) {
1959 					(void) ddi_intr_disable(
1960 					    vio->vio_interrupts[i]);
1961 				}
1962 				break;
1963 			}
1964 		}
1965 	}
1966 
1967 	if (r != DDI_SUCCESS) {
1968 		mutex_exit(&vio->vio_mutex);
1969 		return (r);
1970 	}
1971 
1972 	if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) {
1973 		/*
1974 		 * When asked to enable the interrupts, the system enables
1975 		 * MSI-X in the PCI configuration for the device.  While
1976 		 * enabled, the extra MSI-X configuration table fields appear
1977 		 * between the general and the device-specific regions of the
1978 		 * BAR in the legacy interface.
1979 		 */
1980 		if (!virtio_modern(vio)) {
1981 			vio->vio_legacy_cfg_offset =
1982 			    VIRTIO_LEGACY_CFG_OFFSET_MSIX;
1983 		}
1984 
1985 		for (virtio_queue_t *viq = list_head(&vio->vio_queues);
1986 		    viq != NULL; viq = list_next(&vio->vio_queues, viq)) {
1987 			if (!viq->viq_handler_added) {
1988 				continue;
1989 			}
1990 
1991 			uint16_t qi = viq->viq_index;
1992 			uint16_t msi = viq->viq_handler_index;
1993 
1994 			/*
1995 			 * Route interrupts for this queue to the assigned
1996 			 * MSI-X vector number.
1997 			 */
1998 			vio->vio_ops->vop_msix_queue_set(vio, qi, msi);
1999 
2000 			/*
2001 			 * The device may not actually accept the vector number
2002 			 * we're attempting to program.  We need to confirm
2003 			 * that configuration was successful by re-reading the
2004 			 * configuration we just wrote.
2005 			 */
2006 			if (vio->vio_ops->vop_msix_queue_get(vio, qi) != msi) {
2007 				dev_err(vio->vio_dip, CE_WARN,
2008 				    "failed to configure MSI-X vector %u for "
2009 				    "queue \"%s\" (#%u)", (uint_t)msi,
2010 				    viq->viq_name, (uint_t)qi);
2011 
2012 				virtio_interrupts_unwind(vio);
2013 				mutex_exit(&vio->vio_mutex);
2014 				return (DDI_FAILURE);
2015 			}
2016 		}
2017 
2018 		if (vio->vio_cfgchange_handler_added) {
2019 			vio->vio_ops->vop_msix_config_set(vio,
2020 			    vio->vio_cfgchange_handler_index);
2021 
2022 			/* Verify the value was accepted. */
2023 			if (vio->vio_ops->vop_msix_config_get(vio) !=
2024 			    vio->vio_cfgchange_handler_index) {
2025 				dev_err(vio->vio_dip, CE_WARN,
2026 				    "failed to configure MSI-X vector for "
2027 				    "configuration");
2028 
2029 				virtio_interrupts_unwind(vio);
2030 				mutex_exit(&vio->vio_mutex);
2031 				return (DDI_FAILURE);
2032 			}
2033 		}
2034 	}
2035 
2036 	vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED;
2037 
2038 	mutex_exit(&vio->vio_mutex);
2039 	return (DDI_SUCCESS);
2040 }
2041 
2042 static void
virtio_interrupts_disable_locked(virtio_t * vio)2043 virtio_interrupts_disable_locked(virtio_t *vio)
2044 {
2045 	VERIFY(MUTEX_HELD(&vio->vio_mutex));
2046 
2047 	if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) {
2048 		return;
2049 	}
2050 
2051 	virtio_interrupts_unwind(vio);
2052 
2053 	vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED;
2054 }
2055 
2056 void
virtio_interrupts_disable(virtio_t * vio)2057 virtio_interrupts_disable(virtio_t *vio)
2058 {
2059 	mutex_enter(&vio->vio_mutex);
2060 	virtio_interrupts_disable_locked(vio);
2061 	mutex_exit(&vio->vio_mutex);
2062 }
2063 
2064 /*
2065  * Map a PCI BAR (0-5) to a regset number.
2066  */
2067 static int
virtio_bar_to_rnumber(virtio_t * vio,uint8_t bar)2068 virtio_bar_to_rnumber(virtio_t *vio, uint8_t bar)
2069 {
2070 	pci_regspec_t *regs;
2071 	uint_t bar_offset, regs_length, rcount;
2072 	int rnumber = -1;
2073 
2074 	if (bar > 5)
2075 		return (-1);
2076 
2077 	/*
2078 	 * PCI_CONF_BASE0 is 0x10; each BAR is 4 bytes apart.
2079 	 */
2080 	bar_offset = PCI_CONF_BASE0 + sizeof (uint32_t) * bar;
2081 
2082 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, vio->vio_dip,
2083 	    DDI_PROP_DONTPASS, "reg", (int **)&regs, &regs_length) !=
2084 	    DDI_PROP_SUCCESS) {
2085 		return (-1);
2086 	}
2087 
2088 	rcount = regs_length * sizeof (int) / sizeof (pci_regspec_t);
2089 
2090 	for (int i = 0; i < rcount; i++) {
2091 		if (PCI_REG_REG_G(regs[i].pci_phys_hi) == bar_offset) {
2092 			rnumber = i;
2093 			break;
2094 		}
2095 	}
2096 
2097 	ddi_prop_free(regs);
2098 
2099 	return ((rnumber < rcount) ? rnumber : -1);
2100 }
2101