xref: /linux/kernel/irq/msi.c (revision a9fc2304972b1db28b88af8203dffef23e1e92ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Intel Corp.
4  * Author: Jiang Liu <jiang.liu@linux.intel.com>
5  *
6  * This file is licensed under GPLv2.
7  *
8  * This file contains common code to support Message Signaled Interrupts for
9  * PCI compatible and non PCI compatible devices.
10  */
11 #include <linux/device.h>
12 #include <linux/irq.h>
13 #include <linux/irqdomain.h>
14 #include <linux/msi.h>
15 #include <linux/mutex.h>
16 #include <linux/pci.h>
17 #include <linux/slab.h>
18 #include <linux/seq_file.h>
19 #include <linux/sysfs.h>
20 #include <linux/types.h>
21 #include <linux/xarray.h>
22 
23 #include "internals.h"
24 
25 /**
26  * struct msi_device_data - MSI per device data
27  * @properties:		MSI properties which are interesting to drivers
28  * @mutex:		Mutex protecting the MSI descriptor store
29  * @__domains:		Internal data for per device MSI domains
30  * @__iter_idx:		Index to search the next entry for iterators
31  */
32 struct msi_device_data {
33 	unsigned long			properties;
34 	struct mutex			mutex;
35 	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
36 	unsigned long			__iter_idx;
37 };
38 
39 /**
40  * struct msi_ctrl - MSI internal management control structure
41  * @domid:	ID of the domain on which management operations should be done
42  * @first:	First (hardware) slot index to operate on
43  * @last:	Last (hardware) slot index to operate on
44  * @nirqs:	The number of Linux interrupts to allocate. Can be larger
45  *		than the range due to PCI/multi-MSI.
46  */
47 struct msi_ctrl {
48 	unsigned int			domid;
49 	unsigned int			first;
50 	unsigned int			last;
51 	unsigned int			nirqs;
52 };
53 
54 /* Invalid Xarray index which is outside of any searchable range */
55 #define MSI_XA_MAX_INDEX	(ULONG_MAX - 1)
56 /* The maximum domain size */
57 #define MSI_XA_DOMAIN_SIZE	(MSI_MAX_INDEX + 1)
58 
59 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
60 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
61 static inline int msi_sysfs_create_group(struct device *dev);
62 
63 
64 /**
65  * msi_alloc_desc - Allocate an initialized msi_desc
66  * @dev:	Pointer to the device for which this is allocated
67  * @nvec:	The number of vectors used in this entry
68  * @affinity:	Optional pointer to an affinity mask array size of @nvec
69  *
70  * If @affinity is not %NULL then an affinity array[@nvec] is allocated
71  * and the affinity masks and flags from @affinity are copied.
72  *
73  * Return: pointer to allocated &msi_desc on success or %NULL on failure
74  */
75 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
76 				       const struct irq_affinity_desc *affinity)
77 {
78 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
79 
80 	if (!desc)
81 		return NULL;
82 
83 	desc->dev = dev;
84 	desc->nvec_used = nvec;
85 	if (affinity) {
86 		desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL);
87 		if (!desc->affinity) {
88 			kfree(desc);
89 			return NULL;
90 		}
91 	}
92 	return desc;
93 }
94 
95 static void msi_free_desc(struct msi_desc *desc)
96 {
97 	kfree(desc->affinity);
98 	kfree(desc);
99 }
100 
101 static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
102 			   unsigned int domid, unsigned int index)
103 {
104 	struct msi_device_data *md = dev->msi.data;
105 	struct xarray *xa = &md->__domains[domid].store;
106 	unsigned int hwsize;
107 	int ret;
108 
109 	hwsize = msi_domain_get_hwsize(dev, domid);
110 
111 	if (index == MSI_ANY_INDEX) {
112 		struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
113 		unsigned int index;
114 
115 		/* Let the xarray allocate a free index within the limit */
116 		ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
117 		if (ret)
118 			goto fail;
119 
120 		desc->msi_index = index;
121 		return 0;
122 	} else {
123 		if (index >= hwsize) {
124 			ret = -ERANGE;
125 			goto fail;
126 		}
127 
128 		desc->msi_index = index;
129 		ret = xa_insert(xa, index, desc, GFP_KERNEL);
130 		if (ret)
131 			goto fail;
132 		return 0;
133 	}
134 fail:
135 	msi_free_desc(desc);
136 	return ret;
137 }
138 
139 /**
140  * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
141  *				insert it at @init_desc->msi_index
142  *
143  * @dev:	Pointer to the device for which the descriptor is allocated
144  * @domid:	The id of the interrupt domain to which the desriptor is added
145  * @init_desc:	Pointer to an MSI descriptor to initialize the new descriptor
146  *
147  * Return: 0 on success or an appropriate failure code.
148  */
149 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
150 			       struct msi_desc *init_desc)
151 {
152 	struct msi_desc *desc;
153 
154 	lockdep_assert_held(&dev->msi.data->mutex);
155 
156 	desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
157 	if (!desc)
158 		return -ENOMEM;
159 
160 	/* Copy type specific data to the new descriptor. */
161 	desc->pci = init_desc->pci;
162 
163 	return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
164 }
165 
166 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
167 {
168 	switch (filter) {
169 	case MSI_DESC_ALL:
170 		return true;
171 	case MSI_DESC_NOTASSOCIATED:
172 		return !desc->irq;
173 	case MSI_DESC_ASSOCIATED:
174 		return !!desc->irq;
175 	}
176 	WARN_ON_ONCE(1);
177 	return false;
178 }
179 
180 static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
181 {
182 	unsigned int hwsize;
183 
184 	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
185 			 (dev->msi.domain &&
186 			  !dev->msi.data->__domains[ctrl->domid].domain)))
187 		return false;
188 
189 	hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
190 	if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
191 			 ctrl->first >= hwsize ||
192 			 ctrl->last >= hwsize))
193 		return false;
194 	return true;
195 }
196 
197 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
198 {
199 	struct msi_desc *desc;
200 	struct xarray *xa;
201 	unsigned long idx;
202 
203 	lockdep_assert_held(&dev->msi.data->mutex);
204 
205 	if (!msi_ctrl_valid(dev, ctrl))
206 		return;
207 
208 	xa = &dev->msi.data->__domains[ctrl->domid].store;
209 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
210 		xa_erase(xa, idx);
211 
212 		/* Leak the descriptor when it is still referenced */
213 		if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
214 			continue;
215 		msi_free_desc(desc);
216 	}
217 }
218 
219 /**
220  * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
221  * @dev:	Device for which to free the descriptors
222  * @domid:	Id of the domain to operate on
223  * @first:	Index to start freeing from (inclusive)
224  * @last:	Last index to be freed (inclusive)
225  */
226 void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
227 				     unsigned int first, unsigned int last)
228 {
229 	struct msi_ctrl ctrl = {
230 		.domid	= domid,
231 		.first	= first,
232 		.last	= last,
233 	};
234 
235 	msi_domain_free_descs(dev, &ctrl);
236 }
237 
238 /**
239  * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
240  * @dev:	Pointer to the device for which the descriptors are allocated
241  * @ctrl:	Allocation control struct
242  *
243  * Return: 0 on success or an appropriate failure code.
244  */
245 static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
246 {
247 	struct msi_desc *desc;
248 	unsigned int idx;
249 	int ret;
250 
251 	lockdep_assert_held(&dev->msi.data->mutex);
252 
253 	if (!msi_ctrl_valid(dev, ctrl))
254 		return -EINVAL;
255 
256 	for (idx = ctrl->first; idx <= ctrl->last; idx++) {
257 		desc = msi_alloc_desc(dev, 1, NULL);
258 		if (!desc)
259 			goto fail_mem;
260 		ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
261 		if (ret)
262 			goto fail;
263 	}
264 	return 0;
265 
266 fail_mem:
267 	ret = -ENOMEM;
268 fail:
269 	msi_domain_free_descs(dev, ctrl);
270 	return ret;
271 }
272 
273 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
274 {
275 	struct msi_desc *entry = irq_get_msi_desc(irq);
276 
277 	*msg = entry->msg;
278 }
279 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
280 
281 static void msi_device_data_release(struct device *dev, void *res)
282 {
283 	struct msi_device_data *md = res;
284 	int i;
285 
286 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
287 		msi_remove_device_irq_domain(dev, i);
288 		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
289 		xa_destroy(&md->__domains[i].store);
290 	}
291 	dev->msi.data = NULL;
292 }
293 
294 /**
295  * msi_setup_device_data - Setup MSI device data
296  * @dev:	Device for which MSI device data should be set up
297  *
298  * Return: 0 on success, appropriate error code otherwise
299  *
300  * This can be called more than once for @dev. If the MSI device data is
301  * already allocated the call succeeds. The allocated memory is
302  * automatically released when the device is destroyed.
303  */
304 int msi_setup_device_data(struct device *dev)
305 {
306 	struct msi_device_data *md;
307 	int ret, i;
308 
309 	if (dev->msi.data)
310 		return 0;
311 
312 	md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
313 	if (!md)
314 		return -ENOMEM;
315 
316 	ret = msi_sysfs_create_group(dev);
317 	if (ret) {
318 		devres_free(md);
319 		return ret;
320 	}
321 
322 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
323 		xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
324 
325 	/*
326 	 * If @dev::msi::domain is set and is a global MSI domain, copy the
327 	 * pointer into the domain array so all code can operate on domain
328 	 * ids. The NULL pointer check is required to keep the legacy
329 	 * architecture specific PCI/MSI support working.
330 	 */
331 	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
332 		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
333 
334 	mutex_init(&md->mutex);
335 	dev->msi.data = md;
336 	devres_add(dev, md);
337 	return 0;
338 }
339 
340 /**
341  * __msi_lock_descs - Lock the MSI descriptor storage of a device
342  * @dev:	Device to operate on
343  *
344  * Internal function for guard(msi_descs_lock). Don't use in code.
345  */
346 void __msi_lock_descs(struct device *dev)
347 {
348 	mutex_lock(&dev->msi.data->mutex);
349 }
350 EXPORT_SYMBOL_GPL(__msi_lock_descs);
351 
352 /**
353  * __msi_unlock_descs - Unlock the MSI descriptor storage of a device
354  * @dev:	Device to operate on
355  *
356  * Internal function for guard(msi_descs_lock). Don't use in code.
357  */
358 void __msi_unlock_descs(struct device *dev)
359 {
360 	/* Invalidate the index which was cached by the iterator */
361 	dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
362 	mutex_unlock(&dev->msi.data->mutex);
363 }
364 EXPORT_SYMBOL_GPL(__msi_unlock_descs);
365 
366 static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
367 				      enum msi_desc_filter filter)
368 {
369 	struct xarray *xa = &md->__domains[domid].store;
370 	struct msi_desc *desc;
371 
372 	xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
373 		if (msi_desc_match(desc, filter))
374 			return desc;
375 	}
376 	md->__iter_idx = MSI_XA_MAX_INDEX;
377 	return NULL;
378 }
379 
380 /**
381  * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
382  * @dev:	Device to operate on
383  * @domid:	The id of the interrupt domain which should be walked.
384  * @filter:	Descriptor state filter
385  *
386  * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
387  * must be invoked before the call.
388  *
389  * Return: Pointer to the first MSI descriptor matching the search
390  *	   criteria, NULL if none found.
391  */
392 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
393 				       enum msi_desc_filter filter)
394 {
395 	struct msi_device_data *md = dev->msi.data;
396 
397 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
398 		return NULL;
399 
400 	lockdep_assert_held(&md->mutex);
401 
402 	md->__iter_idx = 0;
403 	return msi_find_desc(md, domid, filter);
404 }
405 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
406 
407 /**
408  * msi_next_desc - Get the next MSI descriptor of a device
409  * @dev:	Device to operate on
410  * @domid:	The id of the interrupt domain which should be walked.
411  * @filter:	Descriptor state filter
412  *
413  * The first invocation of msi_next_desc() has to be preceeded by a
414  * successful invocation of __msi_first_desc(). Consecutive invocations are
415  * only valid if the previous one was successful. All these operations have
416  * to be done within the same MSI mutex held region.
417  *
418  * Return: Pointer to the next MSI descriptor matching the search
419  *	   criteria, NULL if none found.
420  */
421 struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
422 			       enum msi_desc_filter filter)
423 {
424 	struct msi_device_data *md = dev->msi.data;
425 
426 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
427 		return NULL;
428 
429 	lockdep_assert_held(&md->mutex);
430 
431 	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
432 		return NULL;
433 
434 	md->__iter_idx++;
435 	return msi_find_desc(md, domid, filter);
436 }
437 EXPORT_SYMBOL_GPL(msi_next_desc);
438 
439 /**
440  * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
441  * @dev:	Device to operate on
442  * @domid:	Domain ID of the interrupt domain associated to the device
443  * @index:	MSI interrupt index to look for (0-based)
444  *
445  * Return: The Linux interrupt number on success (> 0), 0 if not found
446  */
447 unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
448 {
449 	struct msi_desc *desc;
450 	bool pcimsi = false;
451 	struct xarray *xa;
452 
453 	if (!dev->msi.data)
454 		return 0;
455 
456 	if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
457 		return 0;
458 
459 	/* This check is only valid for the PCI default MSI domain */
460 	if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
461 		pcimsi = to_pci_dev(dev)->msi_enabled;
462 
463 	guard(msi_descs_lock)(dev);
464 	xa = &dev->msi.data->__domains[domid].store;
465 	desc = xa_load(xa, pcimsi ? 0 : index);
466 	if (desc && desc->irq) {
467 		/*
468 		 * PCI-MSI has only one descriptor for multiple interrupts.
469 		 * PCI-MSIX and platform MSI use a descriptor per
470 		 * interrupt.
471 		 */
472 		if (!pcimsi)
473 			return desc->irq;
474 		if (index < desc->nvec_used)
475 			return desc->irq + index;
476 	}
477 	return 0;
478 }
479 EXPORT_SYMBOL_GPL(msi_domain_get_virq);
480 
481 #ifdef CONFIG_SYSFS
482 static struct attribute *msi_dev_attrs[] = {
483 	NULL
484 };
485 
486 static const struct attribute_group msi_irqs_group = {
487 	.name	= "msi_irqs",
488 	.attrs	= msi_dev_attrs,
489 };
490 
491 static inline int msi_sysfs_create_group(struct device *dev)
492 {
493 	return devm_device_add_group(dev, &msi_irqs_group);
494 }
495 
496 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
497 			     char *buf)
498 {
499 	/* MSI vs. MSIX is per device not per interrupt */
500 	bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
501 
502 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
503 }
504 
505 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
506 {
507 	struct device_attribute *attrs = desc->sysfs_attrs;
508 	int i;
509 
510 	if (!attrs)
511 		return;
512 
513 	desc->sysfs_attrs = NULL;
514 	for (i = 0; i < desc->nvec_used; i++) {
515 		if (attrs[i].show)
516 			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
517 		kfree(attrs[i].attr.name);
518 	}
519 	kfree(attrs);
520 }
521 
522 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
523 {
524 	struct device_attribute *attrs;
525 	int ret, i;
526 
527 	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
528 	if (!attrs)
529 		return -ENOMEM;
530 
531 	desc->sysfs_attrs = attrs;
532 	for (i = 0; i < desc->nvec_used; i++) {
533 		sysfs_attr_init(&attrs[i].attr);
534 		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
535 		if (!attrs[i].attr.name) {
536 			ret = -ENOMEM;
537 			goto fail;
538 		}
539 
540 		attrs[i].attr.mode = 0444;
541 		attrs[i].show = msi_mode_show;
542 
543 		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
544 		if (ret) {
545 			attrs[i].show = NULL;
546 			goto fail;
547 		}
548 	}
549 	return 0;
550 
551 fail:
552 	msi_sysfs_remove_desc(dev, desc);
553 	return ret;
554 }
555 
556 #if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
557 /**
558  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
559  * @dev:	The device (PCI, platform etc) which will get sysfs entries
560  */
561 int msi_device_populate_sysfs(struct device *dev)
562 {
563 	struct msi_desc *desc;
564 	int ret;
565 
566 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
567 		if (desc->sysfs_attrs)
568 			continue;
569 		ret = msi_sysfs_populate_desc(dev, desc);
570 		if (ret)
571 			return ret;
572 	}
573 	return 0;
574 }
575 
576 /**
577  * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
578  * @dev:		The device (PCI, platform etc) for which to remove
579  *			sysfs entries
580  */
581 void msi_device_destroy_sysfs(struct device *dev)
582 {
583 	struct msi_desc *desc;
584 
585 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
586 		msi_sysfs_remove_desc(dev, desc);
587 }
588 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
589 #else /* CONFIG_SYSFS */
590 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
591 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
592 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
593 #endif /* !CONFIG_SYSFS */
594 
595 static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
596 {
597 	struct irq_domain *domain;
598 
599 	lockdep_assert_held(&dev->msi.data->mutex);
600 
601 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
602 		return NULL;
603 
604 	domain = dev->msi.data->__domains[domid].domain;
605 	if (!domain)
606 		return NULL;
607 
608 	if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
609 		return NULL;
610 
611 	return domain;
612 }
613 
614 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
615 {
616 	struct msi_domain_info *info;
617 	struct irq_domain *domain;
618 
619 	domain = msi_get_device_domain(dev, domid);
620 	if (domain) {
621 		info = domain->host_data;
622 		return info->hwsize;
623 	}
624 	/* No domain, default to MSI_XA_DOMAIN_SIZE */
625 	return MSI_XA_DOMAIN_SIZE;
626 }
627 
628 static inline void irq_chip_write_msi_msg(struct irq_data *data,
629 					  struct msi_msg *msg)
630 {
631 	data->chip->irq_write_msi_msg(data, msg);
632 }
633 
634 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
635 {
636 	struct msi_domain_info *info = domain->host_data;
637 
638 	/*
639 	 * If the MSI provider has messed with the second message and
640 	 * not advertized that it is level-capable, signal the breakage.
641 	 */
642 	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
643 		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
644 		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
645 }
646 
647 /**
648  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
649  * @irq_data:	The irq data associated to the interrupt
650  * @mask:	The affinity mask to set
651  * @force:	Flag to enforce setting (disable online checks)
652  *
653  * Intended to be used by MSI interrupt controllers which are
654  * implemented with hierarchical domains.
655  *
656  * Return: IRQ_SET_MASK_* result code
657  */
658 int msi_domain_set_affinity(struct irq_data *irq_data,
659 			    const struct cpumask *mask, bool force)
660 {
661 	struct irq_data *parent = irq_data->parent_data;
662 	struct msi_msg msg[2] = { [1] = { }, };
663 	int ret;
664 
665 	ret = parent->chip->irq_set_affinity(parent, mask, force);
666 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
667 		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
668 		msi_check_level(irq_data->domain, msg);
669 		irq_chip_write_msi_msg(irq_data, msg);
670 	}
671 
672 	return ret;
673 }
674 
675 static int msi_domain_activate(struct irq_domain *domain,
676 			       struct irq_data *irq_data, bool early)
677 {
678 	struct msi_msg msg[2] = { [1] = { }, };
679 
680 	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
681 	msi_check_level(irq_data->domain, msg);
682 	irq_chip_write_msi_msg(irq_data, msg);
683 	return 0;
684 }
685 
686 static void msi_domain_deactivate(struct irq_domain *domain,
687 				  struct irq_data *irq_data)
688 {
689 	struct msi_msg msg[2];
690 
691 	memset(msg, 0, sizeof(msg));
692 	irq_chip_write_msi_msg(irq_data, msg);
693 }
694 
695 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
696 			    unsigned int nr_irqs, void *arg)
697 {
698 	struct msi_domain_info *info = domain->host_data;
699 	struct msi_domain_ops *ops = info->ops;
700 	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
701 	int i, ret;
702 
703 	if (irq_find_mapping(domain, hwirq) > 0)
704 		return -EEXIST;
705 
706 	if (domain->parent) {
707 		ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
708 		if (ret < 0)
709 			return ret;
710 	}
711 
712 	for (i = 0; i < nr_irqs; i++) {
713 		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
714 		if (ret < 0) {
715 			if (ops->msi_free) {
716 				for (i--; i >= 0; i--)
717 					ops->msi_free(domain, info, virq + i);
718 			}
719 			irq_domain_free_irqs_top(domain, virq, nr_irqs);
720 			return ret;
721 		}
722 	}
723 
724 	return 0;
725 }
726 
727 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
728 			    unsigned int nr_irqs)
729 {
730 	struct msi_domain_info *info = domain->host_data;
731 	int i;
732 
733 	if (info->ops->msi_free) {
734 		for (i = 0; i < nr_irqs; i++)
735 			info->ops->msi_free(domain, info, virq + i);
736 	}
737 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
738 }
739 
740 static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
741 				irq_hw_number_t *hwirq, unsigned int *type)
742 {
743 	struct msi_domain_info *info = domain->host_data;
744 
745 	/*
746 	 * This will catch allocations through the regular irqdomain path except
747 	 * for MSI domains which really support this, e.g. MBIGEN.
748 	 */
749 	if (!info->ops->msi_translate)
750 		return -ENOTSUPP;
751 	return info->ops->msi_translate(domain, fwspec, hwirq, type);
752 }
753 
754 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
755 static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d,
756 				  struct irq_data *irqd, int ind)
757 {
758 	struct msi_desc *desc = irq_data_get_msi_desc(irqd);
759 
760 	if (!desc)
761 		return;
762 
763 	seq_printf(m, "\n%*saddress_hi: 0x%08x", ind + 1, "", desc->msg.address_hi);
764 	seq_printf(m, "\n%*saddress_lo: 0x%08x", ind + 1, "", desc->msg.address_lo);
765 	seq_printf(m, "\n%*smsg_data:   0x%08x\n", ind + 1, "", desc->msg.data);
766 }
767 #endif
768 
769 static const struct irq_domain_ops msi_domain_ops = {
770 	.alloc		= msi_domain_alloc,
771 	.free		= msi_domain_free,
772 	.activate	= msi_domain_activate,
773 	.deactivate	= msi_domain_deactivate,
774 	.translate	= msi_domain_translate,
775 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
776 	.debug_show     = msi_domain_debug_show,
777 #endif
778 };
779 
780 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
781 						msi_alloc_info_t *arg)
782 {
783 	return arg->hwirq;
784 }
785 
786 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
787 				  int nvec, msi_alloc_info_t *arg)
788 {
789 	memset(arg, 0, sizeof(*arg));
790 	return 0;
791 }
792 
793 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
794 				    struct msi_desc *desc)
795 {
796 	arg->desc = desc;
797 }
798 
799 static int msi_domain_ops_init(struct irq_domain *domain,
800 			       struct msi_domain_info *info,
801 			       unsigned int virq, irq_hw_number_t hwirq,
802 			       msi_alloc_info_t *arg)
803 {
804 	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
805 				      info->chip_data);
806 	if (info->handler && info->handler_name) {
807 		__irq_set_handler(virq, info->handler, 0, info->handler_name);
808 		if (info->handler_data)
809 			irq_set_handler_data(virq, info->handler_data);
810 	}
811 	return 0;
812 }
813 
814 static struct msi_domain_ops msi_domain_ops_default = {
815 	.get_hwirq		= msi_domain_ops_get_hwirq,
816 	.msi_init		= msi_domain_ops_init,
817 	.msi_prepare		= msi_domain_ops_prepare,
818 	.set_desc		= msi_domain_ops_set_desc,
819 };
820 
821 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
822 {
823 	struct msi_domain_ops *ops = info->ops;
824 
825 	if (ops == NULL) {
826 		info->ops = &msi_domain_ops_default;
827 		return;
828 	}
829 
830 	if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
831 		return;
832 
833 	if (ops->get_hwirq == NULL)
834 		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
835 	if (ops->msi_init == NULL)
836 		ops->msi_init = msi_domain_ops_default.msi_init;
837 	if (ops->msi_prepare == NULL)
838 		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
839 	if (ops->set_desc == NULL)
840 		ops->set_desc = msi_domain_ops_default.set_desc;
841 }
842 
843 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
844 {
845 	struct irq_chip *chip = info->chip;
846 
847 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
848 	if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
849 		chip->irq_set_affinity = msi_domain_set_affinity;
850 }
851 
852 static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
853 						  struct msi_domain_info *info,
854 						  unsigned int flags,
855 						  struct irq_domain *parent)
856 {
857 	struct irq_domain *domain;
858 
859 	if (info->hwsize > MSI_XA_DOMAIN_SIZE)
860 		return NULL;
861 
862 	/*
863 	 * Hardware size 0 is valid for backwards compatibility and for
864 	 * domains which are not backed by a hardware table. Grant the
865 	 * maximum index space.
866 	 */
867 	if (!info->hwsize)
868 		info->hwsize = MSI_XA_DOMAIN_SIZE;
869 
870 	msi_domain_update_dom_ops(info);
871 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
872 		msi_domain_update_chip_ops(info);
873 
874 	domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
875 					     fwnode, &msi_domain_ops, info);
876 
877 	if (domain) {
878 		irq_domain_update_bus_token(domain, info->bus_token);
879 		if (info->flags & MSI_FLAG_PARENT_PM_DEV)
880 			domain->pm_dev = parent->pm_dev;
881 	}
882 
883 	return domain;
884 }
885 
886 /**
887  * msi_create_irq_domain - Create an MSI interrupt domain
888  * @fwnode:	Optional fwnode of the interrupt controller
889  * @info:	MSI domain info
890  * @parent:	Parent irq domain
891  *
892  * Return: pointer to the created &struct irq_domain or %NULL on failure
893  */
894 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
895 					 struct msi_domain_info *info,
896 					 struct irq_domain *parent)
897 {
898 	return __msi_create_irq_domain(fwnode, info, 0, parent);
899 }
900 
901 /**
902  * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
903  *				  in the domain hierarchy
904  * @dev:		The device for which the domain should be created
905  * @domain:		The domain in the hierarchy this op is being called on
906  * @msi_parent_domain:	The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
907  *			be created
908  * @msi_child_info:	The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
909  *			domain to be created
910  *
911  * Return: true on success, false otherwise
912  *
913  * This is the most complex problem of per device MSI domains and the
914  * underlying interrupt domain hierarchy:
915  *
916  * The device domain to be initialized requests the broadest feature set
917  * possible and the underlying domain hierarchy puts restrictions on it.
918  *
919  * That's trivial for a simple parent->child relationship, but it gets
920  * interesting with an intermediate domain: root->parent->child.  The
921  * intermediate 'parent' can expand the capabilities which the 'root'
922  * domain is providing. So that creates a classic hen and egg problem:
923  * Which entity is doing the restrictions/expansions?
924  *
925  * One solution is to let the root domain handle the initialization that's
926  * why there is the @domain and the @msi_parent_domain pointer.
927  */
928 bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
929 				  struct irq_domain *msi_parent_domain,
930 				  struct msi_domain_info *msi_child_info)
931 {
932 	struct irq_domain *parent = domain->parent;
933 
934 	if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
935 			 !parent->msi_parent_ops->init_dev_msi_info))
936 		return false;
937 
938 	return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
939 							 msi_child_info);
940 }
941 
942 /**
943  * msi_create_device_irq_domain - Create a device MSI interrupt domain
944  * @dev:		Pointer to the device
945  * @domid:		Domain id
946  * @template:		MSI domain info bundle used as template
947  * @hwsize:		Maximum number of MSI table entries (0 if unknown or unlimited)
948  * @domain_data:	Optional pointer to domain specific data which is set in
949  *			msi_domain_info::data
950  * @chip_data:		Optional pointer to chip specific data which is set in
951  *			msi_domain_info::chip_data
952  *
953  * Return: True on success, false otherwise
954  *
955  * There is no firmware node required for this interface because the per
956  * device domains are software constructs which are actually closer to the
957  * hardware reality than any firmware can describe them.
958  *
959  * The domain name and the irq chip name for a MSI device domain are
960  * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
961  *
962  * $PREFIX:   Optional prefix provided by the underlying MSI parent domain
963  *	      via msi_parent_ops::prefix. If that pointer is NULL the prefix
964  *	      is empty.
965  * $CHIPNAME: The name of the irq_chip in @template
966  * $DEVNAME:  The name of the device
967  *
968  * This results in understandable chip names and hardware interrupt numbers
969  * in e.g. /proc/interrupts
970  *
971  * PCI-MSI-0000:00:1c.0     0-edge  Parent domain has no prefix
972  * IR-PCI-MSI-0000:00:1c.4  0-edge  Same with interrupt remapping prefix 'IR-'
973  *
974  * IR-PCI-MSIX-0000:3d:00.0 0-edge  Hardware interrupt numbers reflect
975  * IR-PCI-MSIX-0000:3d:00.0 1-edge  the real MSI-X index on that device
976  * IR-PCI-MSIX-0000:3d:00.0 2-edge
977  *
978  * On IMS domains the hardware interrupt number is either a table entry
979  * index or a purely software managed index but it is guaranteed to be
980  * unique.
981  *
982  * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
983  * subsequent operations on the domain depend on the domain id.
984  *
985  * The domain is automatically freed when the device is removed via devres
986  * in the context of @dev::msi::data freeing, but it can also be
987  * independently removed via @msi_remove_device_irq_domain().
988  */
989 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
990 				  const struct msi_domain_template *template,
991 				  unsigned int hwsize, void *domain_data,
992 				  void *chip_data)
993 {
994 	struct irq_domain *domain, *parent = dev->msi.domain;
995 	const struct msi_parent_ops *pops;
996 	struct fwnode_handle *fwnode;
997 
998 	if (!irq_domain_is_msi_parent(parent))
999 		return false;
1000 
1001 	if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
1002 		return false;
1003 
1004 	struct msi_domain_template *bundle __free(kfree) =
1005 		bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL);
1006 	if (!bundle)
1007 		return false;
1008 
1009 	bundle->info.hwsize = hwsize;
1010 	bundle->info.chip = &bundle->chip;
1011 	bundle->info.ops = &bundle->ops;
1012 	bundle->info.data = domain_data;
1013 	bundle->info.chip_data = chip_data;
1014 
1015 	pops = parent->msi_parent_ops;
1016 	snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
1017 		 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
1018 	bundle->chip.name = bundle->name;
1019 
1020 	/*
1021 	 * Using the device firmware node is required for wire to MSI
1022 	 * device domains so that the existing firmware results in a domain
1023 	 * match.
1024 	 * All other device domains like PCI/MSI use the named firmware
1025 	 * node as they are not guaranteed to have a fwnode. They are never
1026 	 * looked up and always handled in the context of the device.
1027 	 */
1028 	struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL;
1029 
1030 	if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE))
1031 		fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name);
1032 	else
1033 		fwnode = dev->fwnode;
1034 
1035 	if (!fwnode)
1036 		return false;
1037 
1038 	if (msi_setup_device_data(dev))
1039 		return false;
1040 
1041 	guard(msi_descs_lock)(dev);
1042 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
1043 		return false;
1044 
1045 	if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
1046 		return false;
1047 
1048 	domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
1049 	if (!domain)
1050 		return false;
1051 
1052 	/* @bundle and @fwnode_alloced are now in use. Prevent cleanup */
1053 	retain_ptr(bundle);
1054 	retain_ptr(fwnode_alloced);
1055 	domain->dev = dev;
1056 	dev->msi.data->__domains[domid].domain = domain;
1057 	return true;
1058 }
1059 
1060 /**
1061  * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1062  * @dev:	Pointer to the device
1063  * @domid:	Domain id
1064  */
1065 void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1066 {
1067 	struct fwnode_handle *fwnode = NULL;
1068 	struct msi_domain_info *info;
1069 	struct irq_domain *domain;
1070 
1071 	guard(msi_descs_lock)(dev);
1072 	domain = msi_get_device_domain(dev, domid);
1073 	if (!domain || !irq_domain_is_msi_device(domain))
1074 		return;
1075 
1076 	dev->msi.data->__domains[domid].domain = NULL;
1077 	info = domain->host_data;
1078 	if (irq_domain_is_msi_device(domain))
1079 		fwnode = domain->fwnode;
1080 	irq_domain_remove(domain);
1081 	irq_domain_free_fwnode(fwnode);
1082 	kfree(container_of(info, struct msi_domain_template, info));
1083 }
1084 
1085 /**
1086  * msi_match_device_irq_domain - Match a device irq domain against a bus token
1087  * @dev:	Pointer to the device
1088  * @domid:	Domain id
1089  * @bus_token:	Bus token to match against the domain bus token
1090  *
1091  * Return: True if device domain exists and bus tokens match.
1092  */
1093 bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1094 				 enum irq_domain_bus_token bus_token)
1095 {
1096 	struct msi_domain_info *info;
1097 	struct irq_domain *domain;
1098 
1099 	guard(msi_descs_lock)(dev);
1100 	domain = msi_get_device_domain(dev, domid);
1101 	if (domain && irq_domain_is_msi_device(domain)) {
1102 		info = domain->host_data;
1103 		return info->bus_token == bus_token;
1104 	}
1105 	return false;
1106 }
1107 
1108 static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1109 				   int nvec, msi_alloc_info_t *arg)
1110 {
1111 	struct msi_domain_info *info = domain->host_data;
1112 	struct msi_domain_ops *ops = info->ops;
1113 
1114 	return ops->msi_prepare(domain, dev, nvec, arg);
1115 }
1116 
1117 /*
1118  * Carefully check whether the device can use reservation mode. If
1119  * reservation mode is enabled then the early activation will assign a
1120  * dummy vector to the device. If the PCI/MSI device does not support
1121  * masking of the entry then this can result in spurious interrupts when
1122  * the device driver is not absolutely careful. But even then a malfunction
1123  * of the hardware could result in a spurious interrupt on the dummy vector
1124  * and render the device unusable. If the entry can be masked then the core
1125  * logic will prevent the spurious interrupt and reservation mode can be
1126  * used. For now reservation mode is restricted to PCI/MSI.
1127  */
1128 static bool msi_check_reservation_mode(struct irq_domain *domain,
1129 				       struct msi_domain_info *info,
1130 				       struct device *dev)
1131 {
1132 	struct msi_desc *desc;
1133 
1134 	switch(domain->bus_token) {
1135 	case DOMAIN_BUS_PCI_MSI:
1136 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1137 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1138 	case DOMAIN_BUS_VMD_MSI:
1139 		break;
1140 	default:
1141 		return false;
1142 	}
1143 
1144 	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1145 		return false;
1146 
1147 	if (info->flags & MSI_FLAG_NO_MASK)
1148 		return false;
1149 
1150 	/*
1151 	 * Checking the first MSI descriptor is sufficient. MSIX supports
1152 	 * masking and MSI does so when the can_mask attribute is set.
1153 	 */
1154 	desc = msi_first_desc(dev, MSI_DESC_ALL);
1155 	return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1156 }
1157 
1158 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1159 			       int allocated)
1160 {
1161 	switch(domain->bus_token) {
1162 	case DOMAIN_BUS_PCI_MSI:
1163 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1164 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1165 	case DOMAIN_BUS_VMD_MSI:
1166 		if (IS_ENABLED(CONFIG_PCI_MSI))
1167 			break;
1168 		fallthrough;
1169 	default:
1170 		return -ENOSPC;
1171 	}
1172 
1173 	/* Let a failed PCI multi MSI allocation retry */
1174 	if (desc->nvec_used > 1)
1175 		return 1;
1176 
1177 	/* If there was a successful allocation let the caller know */
1178 	return allocated ? allocated : -ENOSPC;
1179 }
1180 
1181 #define VIRQ_CAN_RESERVE	0x01
1182 #define VIRQ_ACTIVATE		0x02
1183 
1184 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1185 {
1186 	struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1187 	int ret;
1188 
1189 	if (!(vflags & VIRQ_CAN_RESERVE)) {
1190 		irqd_clr_can_reserve(irqd);
1191 
1192 		/*
1193 		 * If the interrupt is managed but no CPU is available to
1194 		 * service it, shut it down until better times. Note that
1195 		 * we only do this on the !RESERVE path as x86 (the only
1196 		 * architecture using this flag) deals with this in a
1197 		 * different way by using a catch-all vector.
1198 		 */
1199 		if ((vflags & VIRQ_ACTIVATE) &&
1200 		    irqd_affinity_is_managed(irqd) &&
1201 		    !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1202 					cpu_online_mask)) {
1203 			    irqd_set_managed_shutdown(irqd);
1204 			    return 0;
1205 		    }
1206 	}
1207 
1208 	if (!(vflags & VIRQ_ACTIVATE))
1209 		return 0;
1210 
1211 	ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1212 	if (ret)
1213 		return ret;
1214 	/*
1215 	 * If the interrupt uses reservation mode, clear the activated bit
1216 	 * so request_irq() will assign the final vector.
1217 	 */
1218 	if (vflags & VIRQ_CAN_RESERVE)
1219 		irqd_clr_activated(irqd);
1220 	return 0;
1221 }
1222 
1223 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1224 				   struct msi_ctrl *ctrl)
1225 {
1226 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1227 	struct msi_domain_info *info = domain->host_data;
1228 	struct msi_domain_ops *ops = info->ops;
1229 	unsigned int vflags = 0, allocated = 0;
1230 	msi_alloc_info_t arg = { };
1231 	struct msi_desc *desc;
1232 	unsigned long idx;
1233 	int i, ret, virq;
1234 
1235 	ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg);
1236 	if (ret)
1237 		return ret;
1238 
1239 	/*
1240 	 * This flag is set by the PCI layer as we need to activate
1241 	 * the MSI entries before the PCI layer enables MSI in the
1242 	 * card. Otherwise the card latches a random msi message.
1243 	 */
1244 	if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1245 		vflags |= VIRQ_ACTIVATE;
1246 
1247 	/*
1248 	 * Interrupt can use a reserved vector and will not occupy
1249 	 * a real device vector until the interrupt is requested.
1250 	 */
1251 	if (msi_check_reservation_mode(domain, info, dev))
1252 		vflags |= VIRQ_CAN_RESERVE;
1253 
1254 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1255 		if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1256 			continue;
1257 
1258 		/* This should return -ECONFUSED... */
1259 		if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1260 			return -EINVAL;
1261 
1262 		if (ops->prepare_desc)
1263 			ops->prepare_desc(domain, &arg, desc);
1264 
1265 		ops->set_desc(&arg, desc);
1266 
1267 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1268 					       dev_to_node(dev), &arg, false,
1269 					       desc->affinity);
1270 		if (virq < 0)
1271 			return msi_handle_pci_fail(domain, desc, allocated);
1272 
1273 		for (i = 0; i < desc->nvec_used; i++) {
1274 			irq_set_msi_desc_off(virq, i, desc);
1275 			irq_debugfs_copy_devname(virq + i, dev);
1276 			ret = msi_init_virq(domain, virq + i, vflags);
1277 			if (ret)
1278 				return ret;
1279 		}
1280 		if (info->flags & MSI_FLAG_DEV_SYSFS) {
1281 			ret = msi_sysfs_populate_desc(dev, desc);
1282 			if (ret)
1283 				return ret;
1284 		}
1285 		allocated++;
1286 	}
1287 	return 0;
1288 }
1289 
1290 static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1291 					     struct msi_domain_info *info,
1292 					     struct msi_ctrl *ctrl)
1293 {
1294 	if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1295 		return 0;
1296 
1297 	return msi_domain_add_simple_msi_descs(dev, ctrl);
1298 }
1299 
1300 static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1301 {
1302 	struct msi_domain_info *info;
1303 	struct msi_domain_ops *ops;
1304 	struct irq_domain *domain;
1305 	int ret;
1306 
1307 	if (!msi_ctrl_valid(dev, ctrl))
1308 		return -EINVAL;
1309 
1310 	domain = msi_get_device_domain(dev, ctrl->domid);
1311 	if (!domain)
1312 		return -ENODEV;
1313 
1314 	info = domain->host_data;
1315 
1316 	ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1317 	if (ret)
1318 		return ret;
1319 
1320 	ops = info->ops;
1321 	if (ops->domain_alloc_irqs)
1322 		return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1323 
1324 	return __msi_domain_alloc_irqs(dev, domain, ctrl);
1325 }
1326 
1327 static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1328 {
1329 	int ret = __msi_domain_alloc_locked(dev, ctrl);
1330 
1331 	if (ret)
1332 		msi_domain_free_locked(dev, ctrl);
1333 	return ret;
1334 }
1335 
1336 /**
1337  * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1338  * @dev:	Pointer to device struct of the device for which the interrupts
1339  *		are allocated
1340  * @domid:	Id of the interrupt domain to operate on
1341  * @first:	First index to allocate (inclusive)
1342  * @last:	Last index to allocate (inclusive)
1343  *
1344  * Return: %0 on success or an error code.
1345  */
1346 int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1347 				unsigned int first, unsigned int last)
1348 {
1349 	struct msi_ctrl ctrl = {
1350 		.domid	= domid,
1351 		.first	= first,
1352 		.last	= last,
1353 		.nirqs	= last + 1 - first,
1354 	};
1355 
1356 	guard(msi_descs_lock)(dev);
1357 	return msi_domain_alloc_locked(dev, &ctrl);
1358 }
1359 EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
1360 
1361 /**
1362  * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1363  *
1364  * @dev:	Pointer to device struct of the device for which the interrupts
1365  *		are allocated
1366  * @domid:	Id of the interrupt domain to operate on
1367  * @nirqs:	The number of interrupts to allocate
1368  *
1369  * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1370  * for all unassigned ones. That function is to be used for MSI domain usage where
1371  * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1372  *
1373  * Return: %0 on success or an error code.
1374  */
1375 int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1376 {
1377 	struct msi_ctrl ctrl = {
1378 		.domid	= domid,
1379 		.first	= 0,
1380 		.last	= msi_domain_get_hwsize(dev, domid) - 1,
1381 		.nirqs	= nirqs,
1382 	};
1383 
1384 	return msi_domain_alloc_locked(dev, &ctrl);
1385 }
1386 
1387 static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
1388 						unsigned int index,
1389 						const struct irq_affinity_desc *affdesc,
1390 						union msi_instance_cookie *icookie)
1391 {
1392 	struct msi_ctrl ctrl = { .domid	= domid, .nirqs = 1, };
1393 	struct irq_domain *domain;
1394 	struct msi_map map = { };
1395 	struct msi_desc *desc;
1396 	int ret;
1397 
1398 	domain = msi_get_device_domain(dev, domid);
1399 	if (!domain) {
1400 		map.index = -ENODEV;
1401 		return map;
1402 	}
1403 
1404 	desc = msi_alloc_desc(dev, 1, affdesc);
1405 	if (!desc) {
1406 		map.index = -ENOMEM;
1407 		return map;
1408 	}
1409 
1410 	if (icookie)
1411 		desc->data.icookie = *icookie;
1412 
1413 	ret = msi_insert_desc(dev, desc, domid, index);
1414 	if (ret) {
1415 		map.index = ret;
1416 		return map;
1417 	}
1418 
1419 	ctrl.first = ctrl.last = desc->msi_index;
1420 
1421 	ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1422 	if (ret) {
1423 		map.index = ret;
1424 		msi_domain_free_locked(dev, &ctrl);
1425 	} else {
1426 		map.index = desc->msi_index;
1427 		map.virq = desc->irq;
1428 	}
1429 	return map;
1430 }
1431 
1432 /**
1433  * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1434  *			     a given index - or at the next free index
1435  *
1436  * @dev:	Pointer to device struct of the device for which the interrupts
1437  *		are allocated
1438  * @domid:	Id of the interrupt domain to operate on
1439  * @index:	Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1440  *		uses the next free index.
1441  * @affdesc:	Optional pointer to an interrupt affinity descriptor structure
1442  * @icookie:	Optional pointer to a domain specific per instance cookie. If
1443  *		non-NULL the content of the cookie is stored in msi_desc::data.
1444  *		Must be NULL for MSI-X allocations
1445  *
1446  * This requires a MSI interrupt domain which lets the core code manage the
1447  * MSI descriptors.
1448  *
1449  * Return: struct msi_map
1450  *
1451  *	On success msi_map::index contains the allocated index number and
1452  *	msi_map::virq the corresponding Linux interrupt number
1453  *
1454  *	On failure msi_map::index contains the error code and msi_map::virq
1455  *	is %0.
1456  */
1457 struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1458 				       const struct irq_affinity_desc *affdesc,
1459 				       union msi_instance_cookie *icookie)
1460 {
1461 	guard(msi_descs_lock)(dev);
1462 	return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
1463 }
1464 
1465 /**
1466  * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
1467  * @domain:	The domain to allocate on
1468  * @hwirq:	The hardware interrupt number to allocate for
1469  * @type:	The interrupt type
1470  *
1471  * This weirdness supports wire to MSI controllers like MBIGEN.
1472  *
1473  * @hwirq is the hardware interrupt number which is handed in from
1474  * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
1475  * sized in firmware, the hardware interrupt number cannot be used as MSI
1476  * index. For the underlying irq chip the MSI index is irrelevant and
1477  * all it needs is the hardware interrupt number.
1478  *
1479  * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
1480  * hardware interrupt number is stored along with the type information in
1481  * msi_desc::cookie so the underlying interrupt chip and domain code can
1482  * retrieve it.
1483  *
1484  * Return: The Linux interrupt number (> 0) or an error code
1485  */
1486 int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
1487 				  unsigned int type)
1488 {
1489 	unsigned int domid = MSI_DEFAULT_DOMAIN;
1490 	union msi_instance_cookie icookie = { };
1491 	struct device *dev = domain->dev;
1492 	struct msi_map map = { };
1493 
1494 	if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1495 		return -EINVAL;
1496 
1497 	icookie.value = ((u64)type << 32) | hwirq;
1498 
1499 	guard(msi_descs_lock)(dev);
1500 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
1501 		map.index = -EINVAL;
1502 	else
1503 		map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
1504 	return map.index >= 0 ? map.virq : map.index;
1505 }
1506 
1507 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1508 				   struct msi_ctrl *ctrl)
1509 {
1510 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1511 	struct msi_domain_info *info = domain->host_data;
1512 	struct irq_data *irqd;
1513 	struct msi_desc *desc;
1514 	unsigned long idx;
1515 	int i;
1516 
1517 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1518 		/* Only handle MSI entries which have an interrupt associated */
1519 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1520 			continue;
1521 
1522 		/* Make sure all interrupts are deactivated */
1523 		for (i = 0; i < desc->nvec_used; i++) {
1524 			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1525 			if (irqd && irqd_is_activated(irqd))
1526 				irq_domain_deactivate_irq(irqd);
1527 		}
1528 
1529 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
1530 		if (info->flags & MSI_FLAG_DEV_SYSFS)
1531 			msi_sysfs_remove_desc(dev, desc);
1532 		desc->irq = 0;
1533 	}
1534 }
1535 
1536 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1537 {
1538 	struct msi_domain_info *info;
1539 	struct msi_domain_ops *ops;
1540 	struct irq_domain *domain;
1541 
1542 	if (!msi_ctrl_valid(dev, ctrl))
1543 		return;
1544 
1545 	domain = msi_get_device_domain(dev, ctrl->domid);
1546 	if (!domain)
1547 		return;
1548 
1549 	info = domain->host_data;
1550 	ops = info->ops;
1551 
1552 	if (ops->domain_free_irqs)
1553 		ops->domain_free_irqs(domain, dev);
1554 	else
1555 		__msi_domain_free_irqs(dev, domain, ctrl);
1556 
1557 	if (ops->msi_post_free)
1558 		ops->msi_post_free(domain, dev);
1559 
1560 	if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1561 		msi_domain_free_descs(dev, ctrl);
1562 }
1563 
1564 /**
1565  * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1566  *				       associated to @dev with msi_lock held
1567  * @dev:	Pointer to device struct of the device for which the interrupts
1568  *		are freed
1569  * @domid:	Id of the interrupt domain to operate on
1570  * @first:	First index to free (inclusive)
1571  * @last:	Last index to free (inclusive)
1572  */
1573 static void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1574 					      unsigned int first, unsigned int last)
1575 {
1576 	struct msi_ctrl ctrl = {
1577 		.domid	= domid,
1578 		.first	= first,
1579 		.last	= last,
1580 	};
1581 	msi_domain_free_locked(dev, &ctrl);
1582 }
1583 
1584 /**
1585  * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1586  *				associated to @dev
1587  * @dev:	Pointer to device struct of the device for which the interrupts
1588  *		are freed
1589  * @domid:	Id of the interrupt domain to operate on
1590  * @first:	First index to free (inclusive)
1591  * @last:	Last index to free (inclusive)
1592  */
1593 void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1594 				unsigned int first, unsigned int last)
1595 {
1596 	guard(msi_descs_lock)(dev);
1597 	msi_domain_free_irqs_range_locked(dev, domid, first, last);
1598 }
1599 EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
1600 
1601 /**
1602  * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1603  *				     associated to a device
1604  * @dev:	Pointer to device struct of the device for which the interrupts
1605  *		are freed
1606  * @domid:	The id of the domain to operate on
1607  *
1608  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1609  * pair. Use this for MSI irqdomains which implement their own vector
1610  * allocation.
1611  */
1612 void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1613 {
1614 	msi_domain_free_irqs_range_locked(dev, domid, 0,
1615 					  msi_domain_get_hwsize(dev, domid) - 1);
1616 }
1617 
1618 /**
1619  * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1620  *			      associated to a device
1621  * @dev:	Pointer to device struct of the device for which the interrupts
1622  *		are freed
1623  * @domid:	The id of the domain to operate on
1624  */
1625 void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1626 {
1627 	guard(msi_descs_lock)(dev);
1628 	msi_domain_free_irqs_all_locked(dev, domid);
1629 }
1630 
1631 /**
1632  * msi_device_domain_free_wired - Free a wired interrupt in @domain
1633  * @domain:	The domain to free the interrupt on
1634  * @virq:	The Linux interrupt number to free
1635  *
1636  * This is the counterpart of msi_device_domain_alloc_wired() for the
1637  * weird wired to MSI converting domains.
1638  */
1639 void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
1640 {
1641 	struct msi_desc *desc = irq_get_msi_desc(virq);
1642 	struct device *dev = domain->dev;
1643 
1644 	if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1645 		return;
1646 
1647 	guard(msi_descs_lock)(dev);
1648 	if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain))
1649 		return;
1650 	msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
1651 					  desc->msi_index);
1652 }
1653 
1654 /**
1655  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1656  * @domain:	The interrupt domain to retrieve data from
1657  *
1658  * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1659  */
1660 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1661 {
1662 	return (struct msi_domain_info *)domain->host_data;
1663 }
1664 
1665 /**
1666  * msi_device_has_isolated_msi - True if the device has isolated MSI
1667  * @dev: The device to check
1668  *
1669  * Isolated MSI means that HW modeled by an irq_domain on the path from the
1670  * initiating device to the CPU will validate that the MSI message specifies an
1671  * interrupt number that the device is authorized to trigger. This must block
1672  * devices from triggering interrupts they are not authorized to trigger.
1673  * Currently authorization means the MSI vector is one assigned to the device.
1674  *
1675  * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1676  * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1677  * impact outside its security domain, eg userspace triggering interrupts on
1678  * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1679  * triggering interrupts on another VM.
1680  */
1681 bool msi_device_has_isolated_msi(struct device *dev)
1682 {
1683 	struct irq_domain *domain = dev_get_msi_domain(dev);
1684 
1685 	for (; domain; domain = domain->parent)
1686 		if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
1687 			return true;
1688 	return arch_is_isolated_msi();
1689 }
1690 EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
1691