xref: /linux/kernel/irq/msi.c (revision 03f76ddff5b04a808ae16c06418460151e2fdd4b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Intel Corp.
4  * Author: Jiang Liu <jiang.liu@linux.intel.com>
5  *
6  * This file is licensed under GPLv2.
7  *
8  * This file contains common code to support Message Signaled Interrupts for
9  * PCI compatible and non PCI compatible devices.
10  */
11 #include <linux/device.h>
12 #include <linux/irq.h>
13 #include <linux/irqdomain.h>
14 #include <linux/msi.h>
15 #include <linux/mutex.h>
16 #include <linux/pci.h>
17 #include <linux/slab.h>
18 #include <linux/seq_file.h>
19 #include <linux/sysfs.h>
20 #include <linux/types.h>
21 #include <linux/xarray.h>
22 
23 #include "internals.h"
24 
25 /**
26  * struct msi_device_data - MSI per device data
27  * @properties:		MSI properties which are interesting to drivers
28  * @mutex:		Mutex protecting the MSI descriptor store
29  * @__domains:		Internal data for per device MSI domains
30  * @__iter_idx:		Index to search the next entry for iterators
31  */
32 struct msi_device_data {
33 	unsigned long			properties;
34 	struct mutex			mutex;
35 	struct msi_dev_domain		__domains[MSI_MAX_DEVICE_IRQDOMAINS];
36 	unsigned long			__iter_idx;
37 };
38 
39 /**
40  * struct msi_ctrl - MSI internal management control structure
41  * @domid:	ID of the domain on which management operations should be done
42  * @first:	First (hardware) slot index to operate on
43  * @last:	Last (hardware) slot index to operate on
44  * @nirqs:	The number of Linux interrupts to allocate. Can be larger
45  *		than the range due to PCI/multi-MSI.
46  */
47 struct msi_ctrl {
48 	unsigned int			domid;
49 	unsigned int			first;
50 	unsigned int			last;
51 	unsigned int			nirqs;
52 };
53 
54 /* Invalid Xarray index which is outside of any searchable range */
55 #define MSI_XA_MAX_INDEX	(ULONG_MAX - 1)
56 /* The maximum domain size */
57 #define MSI_XA_DOMAIN_SIZE	(MSI_MAX_INDEX + 1)
58 
59 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl);
60 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid);
61 static inline int msi_sysfs_create_group(struct device *dev);
62 static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
63 				   int nvec, msi_alloc_info_t *arg);
64 
65 /**
66  * msi_alloc_desc - Allocate an initialized msi_desc
67  * @dev:	Pointer to the device for which this is allocated
68  * @nvec:	The number of vectors used in this entry
69  * @affinity:	Optional pointer to an affinity mask array size of @nvec
70  *
71  * If @affinity is not %NULL then an affinity array[@nvec] is allocated
72  * and the affinity masks and flags from @affinity are copied.
73  *
74  * Return: pointer to allocated &msi_desc on success or %NULL on failure
75  */
76 static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
77 				       const struct irq_affinity_desc *affinity)
78 {
79 	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
80 
81 	if (!desc)
82 		return NULL;
83 
84 	desc->dev = dev;
85 	desc->nvec_used = nvec;
86 	if (affinity) {
87 		desc->affinity = kmemdup_array(affinity, nvec, sizeof(*desc->affinity), GFP_KERNEL);
88 		if (!desc->affinity) {
89 			kfree(desc);
90 			return NULL;
91 		}
92 	}
93 	return desc;
94 }
95 
96 static void msi_free_desc(struct msi_desc *desc)
97 {
98 	kfree(desc->affinity);
99 	kfree(desc);
100 }
101 
102 static int msi_insert_desc(struct device *dev, struct msi_desc *desc,
103 			   unsigned int domid, unsigned int index)
104 {
105 	struct msi_device_data *md = dev->msi.data;
106 	struct xarray *xa = &md->__domains[domid].store;
107 	unsigned int hwsize;
108 	int ret;
109 
110 	hwsize = msi_domain_get_hwsize(dev, domid);
111 
112 	if (index == MSI_ANY_INDEX) {
113 		struct xa_limit limit = { .min = 0, .max = hwsize - 1 };
114 		unsigned int index;
115 
116 		/* Let the xarray allocate a free index within the limit */
117 		ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL);
118 		if (ret)
119 			goto fail;
120 
121 		desc->msi_index = index;
122 		return 0;
123 	} else {
124 		if (index >= hwsize) {
125 			ret = -ERANGE;
126 			goto fail;
127 		}
128 
129 		desc->msi_index = index;
130 		ret = xa_insert(xa, index, desc, GFP_KERNEL);
131 		if (ret)
132 			goto fail;
133 		return 0;
134 	}
135 fail:
136 	msi_free_desc(desc);
137 	return ret;
138 }
139 
140 /**
141  * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and
142  *				insert it at @init_desc->msi_index
143  *
144  * @dev:	Pointer to the device for which the descriptor is allocated
145  * @domid:	The id of the interrupt domain to which the desriptor is added
146  * @init_desc:	Pointer to an MSI descriptor to initialize the new descriptor
147  *
148  * Return: 0 on success or an appropriate failure code.
149  */
150 int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid,
151 			       struct msi_desc *init_desc)
152 {
153 	struct msi_desc *desc;
154 
155 	lockdep_assert_held(&dev->msi.data->mutex);
156 
157 	desc = msi_alloc_desc(dev, init_desc->nvec_used, init_desc->affinity);
158 	if (!desc)
159 		return -ENOMEM;
160 
161 	/* Copy type specific data to the new descriptor. */
162 	desc->pci = init_desc->pci;
163 
164 	return msi_insert_desc(dev, desc, domid, init_desc->msi_index);
165 }
166 
167 static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
168 {
169 	switch (filter) {
170 	case MSI_DESC_ALL:
171 		return true;
172 	case MSI_DESC_NOTASSOCIATED:
173 		return !desc->irq;
174 	case MSI_DESC_ASSOCIATED:
175 		return !!desc->irq;
176 	}
177 	WARN_ON_ONCE(1);
178 	return false;
179 }
180 
181 static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
182 {
183 	unsigned int hwsize;
184 
185 	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
186 			 (dev->msi.domain &&
187 			  !dev->msi.data->__domains[ctrl->domid].domain)))
188 		return false;
189 
190 	hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
191 	if (WARN_ON_ONCE(ctrl->first > ctrl->last ||
192 			 ctrl->first >= hwsize ||
193 			 ctrl->last >= hwsize))
194 		return false;
195 	return true;
196 }
197 
198 static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl)
199 {
200 	struct msi_desc *desc;
201 	struct xarray *xa;
202 	unsigned long idx;
203 
204 	lockdep_assert_held(&dev->msi.data->mutex);
205 
206 	if (!msi_ctrl_valid(dev, ctrl))
207 		return;
208 
209 	xa = &dev->msi.data->__domains[ctrl->domid].store;
210 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
211 		xa_erase(xa, idx);
212 
213 		/* Leak the descriptor when it is still referenced */
214 		if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED)))
215 			continue;
216 		msi_free_desc(desc);
217 	}
218 }
219 
220 /**
221  * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain
222  * @dev:	Device for which to free the descriptors
223  * @domid:	Id of the domain to operate on
224  * @first:	Index to start freeing from (inclusive)
225  * @last:	Last index to be freed (inclusive)
226  */
227 void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid,
228 				     unsigned int first, unsigned int last)
229 {
230 	struct msi_ctrl ctrl = {
231 		.domid	= domid,
232 		.first	= first,
233 		.last	= last,
234 	};
235 
236 	msi_domain_free_descs(dev, &ctrl);
237 }
238 
239 /**
240  * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors
241  * @dev:	Pointer to the device for which the descriptors are allocated
242  * @ctrl:	Allocation control struct
243  *
244  * Return: 0 on success or an appropriate failure code.
245  */
246 static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl)
247 {
248 	struct msi_desc *desc;
249 	unsigned int idx;
250 	int ret;
251 
252 	lockdep_assert_held(&dev->msi.data->mutex);
253 
254 	if (!msi_ctrl_valid(dev, ctrl))
255 		return -EINVAL;
256 
257 	for (idx = ctrl->first; idx <= ctrl->last; idx++) {
258 		desc = msi_alloc_desc(dev, 1, NULL);
259 		if (!desc)
260 			goto fail_mem;
261 		ret = msi_insert_desc(dev, desc, ctrl->domid, idx);
262 		if (ret)
263 			goto fail;
264 	}
265 	return 0;
266 
267 fail_mem:
268 	ret = -ENOMEM;
269 fail:
270 	msi_domain_free_descs(dev, ctrl);
271 	return ret;
272 }
273 
274 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
275 {
276 	*msg = entry->msg;
277 }
278 
279 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
280 {
281 	struct msi_desc *entry = irq_get_msi_desc(irq);
282 
283 	__get_cached_msi_msg(entry, msg);
284 }
285 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
286 
287 static void msi_device_data_release(struct device *dev, void *res)
288 {
289 	struct msi_device_data *md = res;
290 	int i;
291 
292 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) {
293 		msi_remove_device_irq_domain(dev, i);
294 		WARN_ON_ONCE(!xa_empty(&md->__domains[i].store));
295 		xa_destroy(&md->__domains[i].store);
296 	}
297 	dev->msi.data = NULL;
298 }
299 
300 /**
301  * msi_setup_device_data - Setup MSI device data
302  * @dev:	Device for which MSI device data should be set up
303  *
304  * Return: 0 on success, appropriate error code otherwise
305  *
306  * This can be called more than once for @dev. If the MSI device data is
307  * already allocated the call succeeds. The allocated memory is
308  * automatically released when the device is destroyed.
309  */
310 int msi_setup_device_data(struct device *dev)
311 {
312 	struct msi_device_data *md;
313 	int ret, i;
314 
315 	if (dev->msi.data)
316 		return 0;
317 
318 	md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
319 	if (!md)
320 		return -ENOMEM;
321 
322 	ret = msi_sysfs_create_group(dev);
323 	if (ret) {
324 		devres_free(md);
325 		return ret;
326 	}
327 
328 	for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++)
329 		xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC);
330 
331 	/*
332 	 * If @dev::msi::domain is set and is a global MSI domain, copy the
333 	 * pointer into the domain array so all code can operate on domain
334 	 * ids. The NULL pointer check is required to keep the legacy
335 	 * architecture specific PCI/MSI support working.
336 	 */
337 	if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain))
338 		md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain;
339 
340 	mutex_init(&md->mutex);
341 	dev->msi.data = md;
342 	devres_add(dev, md);
343 	return 0;
344 }
345 
346 /**
347  * __msi_lock_descs - Lock the MSI descriptor storage of a device
348  * @dev:	Device to operate on
349  *
350  * Internal function for guard(msi_descs_lock). Don't use in code.
351  */
352 void __msi_lock_descs(struct device *dev)
353 {
354 	mutex_lock(&dev->msi.data->mutex);
355 }
356 EXPORT_SYMBOL_GPL(__msi_lock_descs);
357 
358 /**
359  * __msi_unlock_descs - Unlock the MSI descriptor storage of a device
360  * @dev:	Device to operate on
361  *
362  * Internal function for guard(msi_descs_lock). Don't use in code.
363  */
364 void __msi_unlock_descs(struct device *dev)
365 {
366 	/* Invalidate the index which was cached by the iterator */
367 	dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX;
368 	mutex_unlock(&dev->msi.data->mutex);
369 }
370 EXPORT_SYMBOL_GPL(__msi_unlock_descs);
371 
372 static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid,
373 				      enum msi_desc_filter filter)
374 {
375 	struct xarray *xa = &md->__domains[domid].store;
376 	struct msi_desc *desc;
377 
378 	xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) {
379 		if (msi_desc_match(desc, filter))
380 			return desc;
381 	}
382 	md->__iter_idx = MSI_XA_MAX_INDEX;
383 	return NULL;
384 }
385 
386 /**
387  * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device
388  * @dev:	Device to operate on
389  * @domid:	The id of the interrupt domain which should be walked.
390  * @filter:	Descriptor state filter
391  *
392  * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs()
393  * must be invoked before the call.
394  *
395  * Return: Pointer to the first MSI descriptor matching the search
396  *	   criteria, NULL if none found.
397  */
398 struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid,
399 				       enum msi_desc_filter filter)
400 {
401 	struct msi_device_data *md = dev->msi.data;
402 
403 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
404 		return NULL;
405 
406 	lockdep_assert_held(&md->mutex);
407 
408 	md->__iter_idx = 0;
409 	return msi_find_desc(md, domid, filter);
410 }
411 EXPORT_SYMBOL_GPL(msi_domain_first_desc);
412 
413 /**
414  * msi_next_desc - Get the next MSI descriptor of a device
415  * @dev:	Device to operate on
416  * @domid:	The id of the interrupt domain which should be walked.
417  * @filter:	Descriptor state filter
418  *
419  * The first invocation of msi_next_desc() has to be preceeded by a
420  * successful invocation of __msi_first_desc(). Consecutive invocations are
421  * only valid if the previous one was successful. All these operations have
422  * to be done within the same MSI mutex held region.
423  *
424  * Return: Pointer to the next MSI descriptor matching the search
425  *	   criteria, NULL if none found.
426  */
427 struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid,
428 			       enum msi_desc_filter filter)
429 {
430 	struct msi_device_data *md = dev->msi.data;
431 
432 	if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
433 		return NULL;
434 
435 	lockdep_assert_held(&md->mutex);
436 
437 	if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
438 		return NULL;
439 
440 	md->__iter_idx++;
441 	return msi_find_desc(md, domid, filter);
442 }
443 EXPORT_SYMBOL_GPL(msi_next_desc);
444 
445 /**
446  * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain
447  * @dev:	Device to operate on
448  * @domid:	Domain ID of the interrupt domain associated to the device
449  * @index:	MSI interrupt index to look for (0-based)
450  *
451  * Return: The Linux interrupt number on success (> 0), 0 if not found
452  */
453 unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index)
454 {
455 	struct msi_desc *desc;
456 	bool pcimsi = false;
457 	struct xarray *xa;
458 
459 	if (!dev->msi.data)
460 		return 0;
461 
462 	if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS))
463 		return 0;
464 
465 	/* This check is only valid for the PCI default MSI domain */
466 	if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN)
467 		pcimsi = to_pci_dev(dev)->msi_enabled;
468 
469 	guard(msi_descs_lock)(dev);
470 	xa = &dev->msi.data->__domains[domid].store;
471 	desc = xa_load(xa, pcimsi ? 0 : index);
472 	if (desc && desc->irq) {
473 		/*
474 		 * PCI-MSI has only one descriptor for multiple interrupts.
475 		 * PCI-MSIX and platform MSI use a descriptor per
476 		 * interrupt.
477 		 */
478 		if (!pcimsi)
479 			return desc->irq;
480 		if (index < desc->nvec_used)
481 			return desc->irq + index;
482 	}
483 	return 0;
484 }
485 EXPORT_SYMBOL_GPL(msi_domain_get_virq);
486 
487 #ifdef CONFIG_SYSFS
488 static struct attribute *msi_dev_attrs[] = {
489 	NULL
490 };
491 
492 static const struct attribute_group msi_irqs_group = {
493 	.name	= "msi_irqs",
494 	.attrs	= msi_dev_attrs,
495 };
496 
497 static inline int msi_sysfs_create_group(struct device *dev)
498 {
499 	return devm_device_add_group(dev, &msi_irqs_group);
500 }
501 
502 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
503 			     char *buf)
504 {
505 	/* MSI vs. MSIX is per device not per interrupt */
506 	bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
507 
508 	return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
509 }
510 
511 static void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc)
512 {
513 	struct device_attribute *attrs = desc->sysfs_attrs;
514 	int i;
515 
516 	if (!attrs)
517 		return;
518 
519 	desc->sysfs_attrs = NULL;
520 	for (i = 0; i < desc->nvec_used; i++) {
521 		if (attrs[i].show)
522 			sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
523 		kfree(attrs[i].attr.name);
524 	}
525 	kfree(attrs);
526 }
527 
528 static int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc)
529 {
530 	struct device_attribute *attrs;
531 	int ret, i;
532 
533 	attrs = kcalloc(desc->nvec_used, sizeof(*attrs), GFP_KERNEL);
534 	if (!attrs)
535 		return -ENOMEM;
536 
537 	desc->sysfs_attrs = attrs;
538 	for (i = 0; i < desc->nvec_used; i++) {
539 		sysfs_attr_init(&attrs[i].attr);
540 		attrs[i].attr.name = kasprintf(GFP_KERNEL, "%d", desc->irq + i);
541 		if (!attrs[i].attr.name) {
542 			ret = -ENOMEM;
543 			goto fail;
544 		}
545 
546 		attrs[i].attr.mode = 0444;
547 		attrs[i].show = msi_mode_show;
548 
549 		ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr, msi_irqs_group.name);
550 		if (ret) {
551 			attrs[i].show = NULL;
552 			goto fail;
553 		}
554 	}
555 	return 0;
556 
557 fail:
558 	msi_sysfs_remove_desc(dev, desc);
559 	return ret;
560 }
561 
562 #if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
563 /**
564  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
565  * @dev:	The device (PCI, platform etc) which will get sysfs entries
566  */
567 int msi_device_populate_sysfs(struct device *dev)
568 {
569 	struct msi_desc *desc;
570 	int ret;
571 
572 	msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
573 		if (desc->sysfs_attrs)
574 			continue;
575 		ret = msi_sysfs_populate_desc(dev, desc);
576 		if (ret)
577 			return ret;
578 	}
579 	return 0;
580 }
581 
582 /**
583  * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
584  * @dev:		The device (PCI, platform etc) for which to remove
585  *			sysfs entries
586  */
587 void msi_device_destroy_sysfs(struct device *dev)
588 {
589 	struct msi_desc *desc;
590 
591 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
592 		msi_sysfs_remove_desc(dev, desc);
593 }
594 #endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
595 #else /* CONFIG_SYSFS */
596 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
597 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
598 static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { }
599 #endif /* !CONFIG_SYSFS */
600 
601 static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid)
602 {
603 	struct irq_domain *domain;
604 
605 	lockdep_assert_held(&dev->msi.data->mutex);
606 
607 	if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS))
608 		return NULL;
609 
610 	domain = dev->msi.data->__domains[domid].domain;
611 	if (!domain)
612 		return NULL;
613 
614 	if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain)))
615 		return NULL;
616 
617 	return domain;
618 }
619 
620 static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid)
621 {
622 	struct msi_domain_info *info;
623 	struct irq_domain *domain;
624 
625 	domain = msi_get_device_domain(dev, domid);
626 	if (domain) {
627 		info = domain->host_data;
628 		return info->hwsize;
629 	}
630 	/* No domain, default to MSI_XA_DOMAIN_SIZE */
631 	return MSI_XA_DOMAIN_SIZE;
632 }
633 
634 static inline void irq_chip_write_msi_msg(struct irq_data *data,
635 					  struct msi_msg *msg)
636 {
637 	data->chip->irq_write_msi_msg(data, msg);
638 }
639 
640 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
641 {
642 	struct msi_domain_info *info = domain->host_data;
643 
644 	/*
645 	 * If the MSI provider has messed with the second message and
646 	 * not advertized that it is level-capable, signal the breakage.
647 	 */
648 	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
649 		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
650 		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
651 }
652 
653 /**
654  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
655  * @irq_data:	The irq data associated to the interrupt
656  * @mask:	The affinity mask to set
657  * @force:	Flag to enforce setting (disable online checks)
658  *
659  * Intended to be used by MSI interrupt controllers which are
660  * implemented with hierarchical domains.
661  *
662  * Return: IRQ_SET_MASK_* result code
663  */
664 int msi_domain_set_affinity(struct irq_data *irq_data,
665 			    const struct cpumask *mask, bool force)
666 {
667 	struct irq_data *parent = irq_data->parent_data;
668 	struct msi_msg msg[2] = { [1] = { }, };
669 	int ret;
670 
671 	ret = parent->chip->irq_set_affinity(parent, mask, force);
672 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
673 		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
674 		msi_check_level(irq_data->domain, msg);
675 		irq_chip_write_msi_msg(irq_data, msg);
676 	}
677 
678 	return ret;
679 }
680 
681 static int msi_domain_activate(struct irq_domain *domain,
682 			       struct irq_data *irq_data, bool early)
683 {
684 	struct msi_msg msg[2] = { [1] = { }, };
685 
686 	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
687 	msi_check_level(irq_data->domain, msg);
688 	irq_chip_write_msi_msg(irq_data, msg);
689 	return 0;
690 }
691 
692 static void msi_domain_deactivate(struct irq_domain *domain,
693 				  struct irq_data *irq_data)
694 {
695 	struct msi_msg msg[2];
696 
697 	memset(msg, 0, sizeof(msg));
698 	irq_chip_write_msi_msg(irq_data, msg);
699 }
700 
701 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
702 			    unsigned int nr_irqs, void *arg)
703 {
704 	struct msi_domain_info *info = domain->host_data;
705 	struct msi_domain_ops *ops = info->ops;
706 	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
707 	int i, ret;
708 
709 	if (irq_find_mapping(domain, hwirq) > 0)
710 		return -EEXIST;
711 
712 	if (domain->parent) {
713 		ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
714 		if (ret < 0)
715 			return ret;
716 	}
717 
718 	for (i = 0; i < nr_irqs; i++) {
719 		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
720 		if (ret < 0) {
721 			if (ops->msi_free) {
722 				for (i--; i >= 0; i--)
723 					ops->msi_free(domain, info, virq + i);
724 			}
725 			irq_domain_free_irqs_top(domain, virq, nr_irqs);
726 			return ret;
727 		}
728 	}
729 
730 	return 0;
731 }
732 
733 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
734 			    unsigned int nr_irqs)
735 {
736 	struct msi_domain_info *info = domain->host_data;
737 	int i;
738 
739 	if (info->ops->msi_free) {
740 		for (i = 0; i < nr_irqs; i++)
741 			info->ops->msi_free(domain, info, virq + i);
742 	}
743 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
744 }
745 
746 static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fwspec,
747 				irq_hw_number_t *hwirq, unsigned int *type)
748 {
749 	struct msi_domain_info *info = domain->host_data;
750 
751 	/*
752 	 * This will catch allocations through the regular irqdomain path except
753 	 * for MSI domains which really support this, e.g. MBIGEN.
754 	 */
755 	if (!info->ops->msi_translate)
756 		return -ENOTSUPP;
757 	return info->ops->msi_translate(domain, fwspec, hwirq, type);
758 }
759 
760 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
761 static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d,
762 				  struct irq_data *irqd, int ind)
763 {
764 	struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL;
765 
766 	if (!desc)
767 		return;
768 
769 	seq_printf(m, "\n%*saddress_hi: 0x%08x", ind + 1, "", desc->msg.address_hi);
770 	seq_printf(m, "\n%*saddress_lo: 0x%08x", ind + 1, "", desc->msg.address_lo);
771 	seq_printf(m, "\n%*smsg_data:   0x%08x\n", ind + 1, "", desc->msg.data);
772 }
773 #endif
774 
775 static const struct irq_domain_ops msi_domain_ops = {
776 	.alloc		= msi_domain_alloc,
777 	.free		= msi_domain_free,
778 	.activate	= msi_domain_activate,
779 	.deactivate	= msi_domain_deactivate,
780 	.translate	= msi_domain_translate,
781 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
782 	.debug_show     = msi_domain_debug_show,
783 #endif
784 };
785 
786 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
787 						msi_alloc_info_t *arg)
788 {
789 	return arg->hwirq;
790 }
791 
792 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
793 				  int nvec, msi_alloc_info_t *arg)
794 {
795 	memset(arg, 0, sizeof(*arg));
796 	return 0;
797 }
798 
799 static void msi_domain_ops_teardown(struct irq_domain *domain, msi_alloc_info_t *arg)
800 {
801 }
802 
803 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
804 				    struct msi_desc *desc)
805 {
806 	arg->desc = desc;
807 }
808 
809 static int msi_domain_ops_init(struct irq_domain *domain,
810 			       struct msi_domain_info *info,
811 			       unsigned int virq, irq_hw_number_t hwirq,
812 			       msi_alloc_info_t *arg)
813 {
814 	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
815 				      info->chip_data);
816 	if (info->handler && info->handler_name) {
817 		__irq_set_handler(virq, info->handler, 0, info->handler_name);
818 		if (info->handler_data)
819 			irq_set_handler_data(virq, info->handler_data);
820 	}
821 	return 0;
822 }
823 
824 static struct msi_domain_ops msi_domain_ops_default = {
825 	.get_hwirq		= msi_domain_ops_get_hwirq,
826 	.msi_init		= msi_domain_ops_init,
827 	.msi_prepare		= msi_domain_ops_prepare,
828 	.msi_teardown		= msi_domain_ops_teardown,
829 	.set_desc		= msi_domain_ops_set_desc,
830 };
831 
832 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
833 {
834 	struct msi_domain_ops *ops = info->ops;
835 
836 	if (ops == NULL) {
837 		info->ops = &msi_domain_ops_default;
838 		return;
839 	}
840 
841 	if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
842 		return;
843 
844 	if (ops->get_hwirq == NULL)
845 		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
846 	if (ops->msi_init == NULL)
847 		ops->msi_init = msi_domain_ops_default.msi_init;
848 	if (ops->msi_prepare == NULL)
849 		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
850 	if (ops->msi_teardown == NULL)
851 		ops->msi_teardown = msi_domain_ops_default.msi_teardown;
852 	if (ops->set_desc == NULL)
853 		ops->set_desc = msi_domain_ops_default.set_desc;
854 }
855 
856 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
857 {
858 	struct irq_chip *chip = info->chip;
859 
860 	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
861 	if (!chip->irq_set_affinity && !(info->flags & MSI_FLAG_NO_AFFINITY))
862 		chip->irq_set_affinity = msi_domain_set_affinity;
863 }
864 
865 static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode,
866 						  struct msi_domain_info *info,
867 						  unsigned int flags,
868 						  struct irq_domain *parent)
869 {
870 	struct irq_domain *domain;
871 
872 	if (info->hwsize > MSI_XA_DOMAIN_SIZE)
873 		return NULL;
874 
875 	/*
876 	 * Hardware size 0 is valid for backwards compatibility and for
877 	 * domains which are not backed by a hardware table. Grant the
878 	 * maximum index space.
879 	 */
880 	if (!info->hwsize)
881 		info->hwsize = MSI_XA_DOMAIN_SIZE;
882 
883 	msi_domain_update_dom_ops(info);
884 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
885 		msi_domain_update_chip_ops(info);
886 
887 	domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0,
888 					     fwnode, &msi_domain_ops, info);
889 
890 	if (domain) {
891 		irq_domain_update_bus_token(domain, info->bus_token);
892 		domain->dev = info->dev;
893 		if (info->flags & MSI_FLAG_PARENT_PM_DEV)
894 			domain->pm_dev = parent->pm_dev;
895 	}
896 
897 	return domain;
898 }
899 
900 /**
901  * msi_create_irq_domain - Create an MSI interrupt domain
902  * @fwnode:	Optional fwnode of the interrupt controller
903  * @info:	MSI domain info
904  * @parent:	Parent irq domain
905  *
906  * Return: pointer to the created &struct irq_domain or %NULL on failure
907  */
908 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
909 					 struct msi_domain_info *info,
910 					 struct irq_domain *parent)
911 {
912 	return __msi_create_irq_domain(fwnode, info, 0, parent);
913 }
914 
915 /**
916  * msi_create_parent_irq_domain - Create an MSI-parent interrupt domain
917  * @info:		MSI irqdomain creation info
918  * @msi_parent_ops:	MSI parent callbacks and configuration
919  *
920  * Return: pointer to the created &struct irq_domain or %NULL on failure
921  */
922 struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info,
923 						const struct msi_parent_ops *msi_parent_ops)
924 {
925 	struct irq_domain *d;
926 
927 	info->hwirq_max		= max(info->hwirq_max, info->size);
928 	info->size		= info->hwirq_max;
929 	info->domain_flags	|= IRQ_DOMAIN_FLAG_MSI_PARENT;
930 	info->bus_token		= msi_parent_ops->bus_select_token;
931 
932 	d = irq_domain_instantiate(info);
933 	if (IS_ERR(d))
934 		return NULL;
935 
936 	d->msi_parent_ops = msi_parent_ops;
937 	return d;
938 }
939 EXPORT_SYMBOL_GPL(msi_create_parent_irq_domain);
940 
941 /**
942  * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down
943  *				  in the domain hierarchy
944  * @dev:		The device for which the domain should be created
945  * @domain:		The domain in the hierarchy this op is being called on
946  * @msi_parent_domain:	The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to
947  *			be created
948  * @msi_child_info:	The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE
949  *			domain to be created
950  *
951  * Return: true on success, false otherwise
952  *
953  * This is the most complex problem of per device MSI domains and the
954  * underlying interrupt domain hierarchy:
955  *
956  * The device domain to be initialized requests the broadest feature set
957  * possible and the underlying domain hierarchy puts restrictions on it.
958  *
959  * That's trivial for a simple parent->child relationship, but it gets
960  * interesting with an intermediate domain: root->parent->child.  The
961  * intermediate 'parent' can expand the capabilities which the 'root'
962  * domain is providing. So that creates a classic hen and egg problem:
963  * Which entity is doing the restrictions/expansions?
964  *
965  * One solution is to let the root domain handle the initialization that's
966  * why there is the @domain and the @msi_parent_domain pointer.
967  */
968 bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
969 				  struct irq_domain *msi_parent_domain,
970 				  struct msi_domain_info *msi_child_info)
971 {
972 	struct irq_domain *parent = domain->parent;
973 
974 	if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops ||
975 			 !parent->msi_parent_ops->init_dev_msi_info))
976 		return false;
977 
978 	return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain,
979 							 msi_child_info);
980 }
981 
982 /**
983  * msi_create_device_irq_domain - Create a device MSI interrupt domain
984  * @dev:		Pointer to the device
985  * @domid:		Domain id
986  * @template:		MSI domain info bundle used as template
987  * @hwsize:		Maximum number of MSI table entries (0 if unknown or unlimited)
988  * @domain_data:	Optional pointer to domain specific data which is set in
989  *			msi_domain_info::data
990  * @chip_data:		Optional pointer to chip specific data which is set in
991  *			msi_domain_info::chip_data
992  *
993  * Return: True on success, false otherwise
994  *
995  * There is no firmware node required for this interface because the per
996  * device domains are software constructs which are actually closer to the
997  * hardware reality than any firmware can describe them.
998  *
999  * The domain name and the irq chip name for a MSI device domain are
1000  * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)"
1001  *
1002  * $PREFIX:   Optional prefix provided by the underlying MSI parent domain
1003  *	      via msi_parent_ops::prefix. If that pointer is NULL the prefix
1004  *	      is empty.
1005  * $CHIPNAME: The name of the irq_chip in @template
1006  * $DEVNAME:  The name of the device
1007  *
1008  * This results in understandable chip names and hardware interrupt numbers
1009  * in e.g. /proc/interrupts
1010  *
1011  * PCI-MSI-0000:00:1c.0     0-edge  Parent domain has no prefix
1012  * IR-PCI-MSI-0000:00:1c.4  0-edge  Same with interrupt remapping prefix 'IR-'
1013  *
1014  * IR-PCI-MSIX-0000:3d:00.0 0-edge  Hardware interrupt numbers reflect
1015  * IR-PCI-MSIX-0000:3d:00.0 1-edge  the real MSI-X index on that device
1016  * IR-PCI-MSIX-0000:3d:00.0 2-edge
1017  *
1018  * On IMS domains the hardware interrupt number is either a table entry
1019  * index or a purely software managed index but it is guaranteed to be
1020  * unique.
1021  *
1022  * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All
1023  * subsequent operations on the domain depend on the domain id.
1024  *
1025  * The domain is automatically freed when the device is removed via devres
1026  * in the context of @dev::msi::data freeing, but it can also be
1027  * independently removed via @msi_remove_device_irq_domain().
1028  */
1029 bool msi_create_device_irq_domain(struct device *dev, unsigned int domid,
1030 				  const struct msi_domain_template *template,
1031 				  unsigned int hwsize, void *domain_data,
1032 				  void *chip_data)
1033 {
1034 	struct irq_domain *domain, *parent = dev->msi.domain;
1035 	const struct msi_parent_ops *pops;
1036 	struct fwnode_handle *fwnode;
1037 
1038 	if (!irq_domain_is_msi_parent(parent))
1039 		return false;
1040 
1041 	if (domid >= MSI_MAX_DEVICE_IRQDOMAINS)
1042 		return false;
1043 
1044 	struct msi_domain_template *bundle __free(kfree) =
1045 		kmemdup(template, sizeof(*bundle), GFP_KERNEL);
1046 	if (!bundle)
1047 		return false;
1048 
1049 	bundle->info.hwsize = hwsize;
1050 	bundle->info.chip = &bundle->chip;
1051 	bundle->info.ops = &bundle->ops;
1052 	bundle->info.data = domain_data;
1053 	bundle->info.chip_data = chip_data;
1054 	bundle->info.alloc_data = &bundle->alloc_info;
1055 	bundle->info.dev = dev;
1056 
1057 	pops = parent->msi_parent_ops;
1058 	snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s",
1059 		 pops->prefix ? : "", bundle->chip.name, dev_name(dev));
1060 	bundle->chip.name = bundle->name;
1061 
1062 	/*
1063 	 * Using the device firmware node is required for wire to MSI
1064 	 * device domains so that the existing firmware results in a domain
1065 	 * match.
1066 	 * All other device domains like PCI/MSI use the named firmware
1067 	 * node as they are not guaranteed to have a fwnode. They are never
1068 	 * looked up and always handled in the context of the device.
1069 	 */
1070 	struct fwnode_handle *fwnode_alloced __free(irq_domain_free_fwnode) = NULL;
1071 
1072 	if (!(bundle->info.flags & MSI_FLAG_USE_DEV_FWNODE))
1073 		fwnode = fwnode_alloced = irq_domain_alloc_named_fwnode(bundle->name);
1074 	else
1075 		fwnode = dev->fwnode;
1076 
1077 	if (!fwnode)
1078 		return false;
1079 
1080 	if (msi_setup_device_data(dev))
1081 		return false;
1082 
1083 	guard(msi_descs_lock)(dev);
1084 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid)))
1085 		return false;
1086 
1087 	if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info))
1088 		return false;
1089 
1090 	domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent);
1091 	if (!domain)
1092 		return false;
1093 
1094 	dev->msi.data->__domains[domid].domain = domain;
1095 
1096 	if (msi_domain_prepare_irqs(domain, dev, hwsize, &bundle->alloc_info)) {
1097 		dev->msi.data->__domains[domid].domain = NULL;
1098 		irq_domain_remove(domain);
1099 		return false;
1100 	}
1101 
1102 	/* @bundle and @fwnode_alloced are now in use. Prevent cleanup */
1103 	retain_and_null_ptr(bundle);
1104 	retain_and_null_ptr(fwnode_alloced);
1105 	return true;
1106 }
1107 
1108 /**
1109  * msi_remove_device_irq_domain - Free a device MSI interrupt domain
1110  * @dev:	Pointer to the device
1111  * @domid:	Domain id
1112  */
1113 void msi_remove_device_irq_domain(struct device *dev, unsigned int domid)
1114 {
1115 	struct fwnode_handle *fwnode = NULL;
1116 	struct msi_domain_info *info;
1117 	struct irq_domain *domain;
1118 
1119 	guard(msi_descs_lock)(dev);
1120 	domain = msi_get_device_domain(dev, domid);
1121 	if (!domain || !irq_domain_is_msi_device(domain))
1122 		return;
1123 
1124 	dev->msi.data->__domains[domid].domain = NULL;
1125 	info = domain->host_data;
1126 
1127 	info->ops->msi_teardown(domain, info->alloc_data);
1128 
1129 	if (irq_domain_is_msi_device(domain))
1130 		fwnode = domain->fwnode;
1131 	irq_domain_remove(domain);
1132 	irq_domain_free_fwnode(fwnode);
1133 	kfree(container_of(info, struct msi_domain_template, info));
1134 }
1135 
1136 /**
1137  * msi_match_device_irq_domain - Match a device irq domain against a bus token
1138  * @dev:	Pointer to the device
1139  * @domid:	Domain id
1140  * @bus_token:	Bus token to match against the domain bus token
1141  *
1142  * Return: True if device domain exists and bus tokens match.
1143  */
1144 bool msi_match_device_irq_domain(struct device *dev, unsigned int domid,
1145 				 enum irq_domain_bus_token bus_token)
1146 {
1147 	struct msi_domain_info *info;
1148 	struct irq_domain *domain;
1149 
1150 	guard(msi_descs_lock)(dev);
1151 	domain = msi_get_device_domain(dev, domid);
1152 	if (domain && irq_domain_is_msi_device(domain)) {
1153 		info = domain->host_data;
1154 		return info->bus_token == bus_token;
1155 	}
1156 	return false;
1157 }
1158 
1159 static int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
1160 				   int nvec, msi_alloc_info_t *arg)
1161 {
1162 	struct msi_domain_info *info = domain->host_data;
1163 	struct msi_domain_ops *ops = info->ops;
1164 
1165 	return ops->msi_prepare(domain, dev, nvec, arg);
1166 }
1167 
1168 /*
1169  * Carefully check whether the device can use reservation mode. If
1170  * reservation mode is enabled then the early activation will assign a
1171  * dummy vector to the device. If the PCI/MSI device does not support
1172  * masking of the entry then this can result in spurious interrupts when
1173  * the device driver is not absolutely careful. But even then a malfunction
1174  * of the hardware could result in a spurious interrupt on the dummy vector
1175  * and render the device unusable. If the entry can be masked then the core
1176  * logic will prevent the spurious interrupt and reservation mode can be
1177  * used. For now reservation mode is restricted to PCI/MSI.
1178  */
1179 static bool msi_check_reservation_mode(struct irq_domain *domain,
1180 				       struct msi_domain_info *info,
1181 				       struct device *dev)
1182 {
1183 	struct msi_desc *desc;
1184 
1185 	switch(domain->bus_token) {
1186 	case DOMAIN_BUS_PCI_MSI:
1187 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1188 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1189 	case DOMAIN_BUS_VMD_MSI:
1190 		break;
1191 	default:
1192 		return false;
1193 	}
1194 
1195 	if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
1196 		return false;
1197 
1198 	if (info->flags & MSI_FLAG_NO_MASK)
1199 		return false;
1200 
1201 	/*
1202 	 * Checking the first MSI descriptor is sufficient. MSIX supports
1203 	 * masking and MSI does so when the can_mask attribute is set.
1204 	 */
1205 	desc = msi_first_desc(dev, MSI_DESC_ALL);
1206 	return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
1207 }
1208 
1209 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
1210 			       int allocated)
1211 {
1212 	switch(domain->bus_token) {
1213 	case DOMAIN_BUS_PCI_MSI:
1214 	case DOMAIN_BUS_PCI_DEVICE_MSI:
1215 	case DOMAIN_BUS_PCI_DEVICE_MSIX:
1216 	case DOMAIN_BUS_VMD_MSI:
1217 		if (IS_ENABLED(CONFIG_PCI_MSI))
1218 			break;
1219 		fallthrough;
1220 	default:
1221 		return -ENOSPC;
1222 	}
1223 
1224 	/* Let a failed PCI multi MSI allocation retry */
1225 	if (desc->nvec_used > 1)
1226 		return 1;
1227 
1228 	/* If there was a successful allocation let the caller know */
1229 	return allocated ? allocated : -ENOSPC;
1230 }
1231 
1232 #define VIRQ_CAN_RESERVE	0x01
1233 #define VIRQ_ACTIVATE		0x02
1234 
1235 static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflags)
1236 {
1237 	struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
1238 	int ret;
1239 
1240 	if (!(vflags & VIRQ_CAN_RESERVE)) {
1241 		irqd_clr_can_reserve(irqd);
1242 
1243 		/*
1244 		 * If the interrupt is managed but no CPU is available to
1245 		 * service it, shut it down until better times. Note that
1246 		 * we only do this on the !RESERVE path as x86 (the only
1247 		 * architecture using this flag) deals with this in a
1248 		 * different way by using a catch-all vector.
1249 		 */
1250 		if ((vflags & VIRQ_ACTIVATE) &&
1251 		    irqd_affinity_is_managed(irqd) &&
1252 		    !cpumask_intersects(irq_data_get_affinity_mask(irqd),
1253 					cpu_online_mask)) {
1254 			    irqd_set_managed_shutdown(irqd);
1255 			    return 0;
1256 		    }
1257 	}
1258 
1259 	if (!(vflags & VIRQ_ACTIVATE))
1260 		return 0;
1261 
1262 	ret = irq_domain_activate_irq(irqd, vflags & VIRQ_CAN_RESERVE);
1263 	if (ret)
1264 		return ret;
1265 	/*
1266 	 * If the interrupt uses reservation mode, clear the activated bit
1267 	 * so request_irq() will assign the final vector.
1268 	 */
1269 	if (vflags & VIRQ_CAN_RESERVE)
1270 		irqd_clr_activated(irqd);
1271 	return 0;
1272 }
1273 
1274 static int populate_alloc_info(struct irq_domain *domain, struct device *dev,
1275 			       unsigned int nirqs, msi_alloc_info_t *arg)
1276 {
1277 	struct msi_domain_info *info = domain->host_data;
1278 
1279 	/*
1280 	 * If the caller has provided a template alloc info, use that. Once
1281 	 * all users of msi_create_irq_domain() have been eliminated, this
1282 	 * should be the only source of allocation information, and the
1283 	 * prepare call below should be finally removed.
1284 	 */
1285 	if (!info->alloc_data)
1286 		return msi_domain_prepare_irqs(domain, dev, nirqs, arg);
1287 
1288 	*arg = *info->alloc_data;
1289 	return 0;
1290 }
1291 
1292 static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain,
1293 				   struct msi_ctrl *ctrl)
1294 {
1295 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1296 	struct msi_domain_info *info = domain->host_data;
1297 	struct msi_domain_ops *ops = info->ops;
1298 	unsigned int vflags = 0, allocated = 0;
1299 	msi_alloc_info_t arg = { };
1300 	struct msi_desc *desc;
1301 	unsigned long idx;
1302 	int i, ret, virq;
1303 
1304 	ret = populate_alloc_info(domain, dev, ctrl->nirqs, &arg);
1305 	if (ret)
1306 		return ret;
1307 
1308 	/*
1309 	 * This flag is set by the PCI layer as we need to activate
1310 	 * the MSI entries before the PCI layer enables MSI in the
1311 	 * card. Otherwise the card latches a random msi message.
1312 	 */
1313 	if (info->flags & MSI_FLAG_ACTIVATE_EARLY)
1314 		vflags |= VIRQ_ACTIVATE;
1315 
1316 	/*
1317 	 * Interrupt can use a reserved vector and will not occupy
1318 	 * a real device vector until the interrupt is requested.
1319 	 */
1320 	if (msi_check_reservation_mode(domain, info, dev))
1321 		vflags |= VIRQ_CAN_RESERVE;
1322 
1323 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1324 		if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED))
1325 			continue;
1326 
1327 		/* This should return -ECONFUSED... */
1328 		if (WARN_ON_ONCE(allocated >= ctrl->nirqs))
1329 			return -EINVAL;
1330 
1331 		if (ops->prepare_desc)
1332 			ops->prepare_desc(domain, &arg, desc);
1333 
1334 		ops->set_desc(&arg, desc);
1335 
1336 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
1337 					       dev_to_node(dev), &arg, false,
1338 					       desc->affinity);
1339 		if (virq < 0)
1340 			return msi_handle_pci_fail(domain, desc, allocated);
1341 
1342 		for (i = 0; i < desc->nvec_used; i++) {
1343 			irq_set_msi_desc_off(virq, i, desc);
1344 			irq_debugfs_copy_devname(virq + i, dev);
1345 			ret = msi_init_virq(domain, virq + i, vflags);
1346 			if (ret)
1347 				return ret;
1348 		}
1349 		if (info->flags & MSI_FLAG_DEV_SYSFS) {
1350 			ret = msi_sysfs_populate_desc(dev, desc);
1351 			if (ret)
1352 				return ret;
1353 		}
1354 		allocated++;
1355 	}
1356 	return 0;
1357 }
1358 
1359 static int msi_domain_alloc_simple_msi_descs(struct device *dev,
1360 					     struct msi_domain_info *info,
1361 					     struct msi_ctrl *ctrl)
1362 {
1363 	if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS))
1364 		return 0;
1365 
1366 	return msi_domain_add_simple_msi_descs(dev, ctrl);
1367 }
1368 
1369 static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1370 {
1371 	struct msi_domain_info *info;
1372 	struct msi_domain_ops *ops;
1373 	struct irq_domain *domain;
1374 	int ret;
1375 
1376 	if (!msi_ctrl_valid(dev, ctrl))
1377 		return -EINVAL;
1378 
1379 	domain = msi_get_device_domain(dev, ctrl->domid);
1380 	if (!domain)
1381 		return -ENODEV;
1382 
1383 	info = domain->host_data;
1384 
1385 	ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl);
1386 	if (ret)
1387 		return ret;
1388 
1389 	ops = info->ops;
1390 	if (ops->domain_alloc_irqs)
1391 		return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs);
1392 
1393 	return __msi_domain_alloc_irqs(dev, domain, ctrl);
1394 }
1395 
1396 static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl)
1397 {
1398 	int ret = __msi_domain_alloc_locked(dev, ctrl);
1399 
1400 	if (ret)
1401 		msi_domain_free_locked(dev, ctrl);
1402 	return ret;
1403 }
1404 
1405 /**
1406  * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain
1407  * @dev:	Pointer to device struct of the device for which the interrupts
1408  *		are allocated
1409  * @domid:	Id of the interrupt domain to operate on
1410  * @first:	First index to allocate (inclusive)
1411  * @last:	Last index to allocate (inclusive)
1412  *
1413  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1414  * pair. Use this for MSI irqdomains which implement their own descriptor
1415  * allocation/free.
1416  *
1417  * Return: %0 on success or an error code.
1418  */
1419 int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid,
1420 				       unsigned int first, unsigned int last)
1421 {
1422 	struct msi_ctrl ctrl = {
1423 		.domid	= domid,
1424 		.first	= first,
1425 		.last	= last,
1426 		.nirqs	= last + 1 - first,
1427 	};
1428 
1429 	return msi_domain_alloc_locked(dev, &ctrl);
1430 }
1431 
1432 /**
1433  * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain
1434  * @dev:	Pointer to device struct of the device for which the interrupts
1435  *		are allocated
1436  * @domid:	Id of the interrupt domain to operate on
1437  * @first:	First index to allocate (inclusive)
1438  * @last:	Last index to allocate (inclusive)
1439  *
1440  * Return: %0 on success or an error code.
1441  */
1442 int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid,
1443 				unsigned int first, unsigned int last)
1444 {
1445 
1446 	guard(msi_descs_lock)(dev);
1447 	return msi_domain_alloc_irqs_range_locked(dev, domid, first, last);
1448 }
1449 EXPORT_SYMBOL_GPL(msi_domain_alloc_irqs_range);
1450 
1451 /**
1452  * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain
1453  *
1454  * @dev:	Pointer to device struct of the device for which the interrupts
1455  *		are allocated
1456  * @domid:	Id of the interrupt domain to operate on
1457  * @nirqs:	The number of interrupts to allocate
1458  *
1459  * This function scans all MSI descriptors of the MSI domain and allocates interrupts
1460  * for all unassigned ones. That function is to be used for MSI domain usage where
1461  * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X].
1462  *
1463  * Return: %0 on success or an error code.
1464  */
1465 int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs)
1466 {
1467 	struct msi_ctrl ctrl = {
1468 		.domid	= domid,
1469 		.first	= 0,
1470 		.last	= msi_domain_get_hwsize(dev, domid) - 1,
1471 		.nirqs	= nirqs,
1472 	};
1473 
1474 	return msi_domain_alloc_locked(dev, &ctrl);
1475 }
1476 
1477 static struct msi_map __msi_domain_alloc_irq_at(struct device *dev, unsigned int domid,
1478 						unsigned int index,
1479 						const struct irq_affinity_desc *affdesc,
1480 						union msi_instance_cookie *icookie)
1481 {
1482 	struct msi_ctrl ctrl = { .domid	= domid, .nirqs = 1, };
1483 	struct irq_domain *domain;
1484 	struct msi_map map = { };
1485 	struct msi_desc *desc;
1486 	int ret;
1487 
1488 	domain = msi_get_device_domain(dev, domid);
1489 	if (!domain) {
1490 		map.index = -ENODEV;
1491 		return map;
1492 	}
1493 
1494 	desc = msi_alloc_desc(dev, 1, affdesc);
1495 	if (!desc) {
1496 		map.index = -ENOMEM;
1497 		return map;
1498 	}
1499 
1500 	if (icookie)
1501 		desc->data.icookie = *icookie;
1502 
1503 	ret = msi_insert_desc(dev, desc, domid, index);
1504 	if (ret) {
1505 		map.index = ret;
1506 		return map;
1507 	}
1508 
1509 	ctrl.first = ctrl.last = desc->msi_index;
1510 
1511 	ret = __msi_domain_alloc_irqs(dev, domain, &ctrl);
1512 	if (ret) {
1513 		map.index = ret;
1514 		msi_domain_free_locked(dev, &ctrl);
1515 	} else {
1516 		map.index = desc->msi_index;
1517 		map.virq = desc->irq;
1518 	}
1519 	return map;
1520 }
1521 
1522 /**
1523  * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at
1524  *			     a given index - or at the next free index
1525  *
1526  * @dev:	Pointer to device struct of the device for which the interrupts
1527  *		are allocated
1528  * @domid:	Id of the interrupt domain to operate on
1529  * @index:	Index for allocation. If @index == %MSI_ANY_INDEX the allocation
1530  *		uses the next free index.
1531  * @affdesc:	Optional pointer to an interrupt affinity descriptor structure
1532  * @icookie:	Optional pointer to a domain specific per instance cookie. If
1533  *		non-NULL the content of the cookie is stored in msi_desc::data.
1534  *		Must be NULL for MSI-X allocations
1535  *
1536  * This requires a MSI interrupt domain which lets the core code manage the
1537  * MSI descriptors.
1538  *
1539  * Return: struct msi_map
1540  *
1541  *	On success msi_map::index contains the allocated index number and
1542  *	msi_map::virq the corresponding Linux interrupt number
1543  *
1544  *	On failure msi_map::index contains the error code and msi_map::virq
1545  *	is %0.
1546  */
1547 struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index,
1548 				       const struct irq_affinity_desc *affdesc,
1549 				       union msi_instance_cookie *icookie)
1550 {
1551 	guard(msi_descs_lock)(dev);
1552 	return __msi_domain_alloc_irq_at(dev, domid, index, affdesc, icookie);
1553 }
1554 
1555 /**
1556  * msi_device_domain_alloc_wired - Allocate a "wired" interrupt on @domain
1557  * @domain:	The domain to allocate on
1558  * @hwirq:	The hardware interrupt number to allocate for
1559  * @type:	The interrupt type
1560  *
1561  * This weirdness supports wire to MSI controllers like MBIGEN.
1562  *
1563  * @hwirq is the hardware interrupt number which is handed in from
1564  * irq_create_fwspec_mapping(). As the wire to MSI domain is sparse, but
1565  * sized in firmware, the hardware interrupt number cannot be used as MSI
1566  * index. For the underlying irq chip the MSI index is irrelevant and
1567  * all it needs is the hardware interrupt number.
1568  *
1569  * To handle this the MSI index is allocated with MSI_ANY_INDEX and the
1570  * hardware interrupt number is stored along with the type information in
1571  * msi_desc::cookie so the underlying interrupt chip and domain code can
1572  * retrieve it.
1573  *
1574  * Return: The Linux interrupt number (> 0) or an error code
1575  */
1576 int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq,
1577 				  unsigned int type)
1578 {
1579 	unsigned int domid = MSI_DEFAULT_DOMAIN;
1580 	union msi_instance_cookie icookie = { };
1581 	struct device *dev = domain->dev;
1582 	struct msi_map map = { };
1583 
1584 	if (WARN_ON_ONCE(!dev || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1585 		return -EINVAL;
1586 
1587 	icookie.value = ((u64)type << 32) | hwirq;
1588 
1589 	guard(msi_descs_lock)(dev);
1590 	if (WARN_ON_ONCE(msi_get_device_domain(dev, domid) != domain))
1591 		map.index = -EINVAL;
1592 	else
1593 		map = __msi_domain_alloc_irq_at(dev, domid, MSI_ANY_INDEX, NULL, &icookie);
1594 	return map.index >= 0 ? map.virq : map.index;
1595 }
1596 
1597 static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain,
1598 				   struct msi_ctrl *ctrl)
1599 {
1600 	struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store;
1601 	struct msi_domain_info *info = domain->host_data;
1602 	struct irq_data *irqd;
1603 	struct msi_desc *desc;
1604 	unsigned long idx;
1605 	int i;
1606 
1607 	xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) {
1608 		/* Only handle MSI entries which have an interrupt associated */
1609 		if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED))
1610 			continue;
1611 
1612 		/* Make sure all interrupts are deactivated */
1613 		for (i = 0; i < desc->nvec_used; i++) {
1614 			irqd = irq_domain_get_irq_data(domain, desc->irq + i);
1615 			if (irqd && irqd_is_activated(irqd))
1616 				irq_domain_deactivate_irq(irqd);
1617 		}
1618 
1619 		irq_domain_free_irqs(desc->irq, desc->nvec_used);
1620 		if (info->flags & MSI_FLAG_DEV_SYSFS)
1621 			msi_sysfs_remove_desc(dev, desc);
1622 		desc->irq = 0;
1623 	}
1624 }
1625 
1626 static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl)
1627 {
1628 	struct msi_domain_info *info;
1629 	struct msi_domain_ops *ops;
1630 	struct irq_domain *domain;
1631 
1632 	if (!msi_ctrl_valid(dev, ctrl))
1633 		return;
1634 
1635 	domain = msi_get_device_domain(dev, ctrl->domid);
1636 	if (!domain)
1637 		return;
1638 
1639 	info = domain->host_data;
1640 	ops = info->ops;
1641 
1642 	if (ops->domain_free_irqs)
1643 		ops->domain_free_irqs(domain, dev);
1644 	else
1645 		__msi_domain_free_irqs(dev, domain, ctrl);
1646 
1647 	if (info->flags & MSI_FLAG_FREE_MSI_DESCS)
1648 		msi_domain_free_descs(dev, ctrl);
1649 }
1650 
1651 /**
1652  * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain
1653  *				       associated to @dev with msi_lock held
1654  * @dev:	Pointer to device struct of the device for which the interrupts
1655  *		are freed
1656  * @domid:	Id of the interrupt domain to operate on
1657  * @first:	First index to free (inclusive)
1658  * @last:	Last index to free (inclusive)
1659  */
1660 void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid,
1661 				       unsigned int first, unsigned int last)
1662 {
1663 	struct msi_ctrl ctrl = {
1664 		.domid	= domid,
1665 		.first	= first,
1666 		.last	= last,
1667 	};
1668 	msi_domain_free_locked(dev, &ctrl);
1669 }
1670 
1671 /**
1672  * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain
1673  *				associated to @dev
1674  * @dev:	Pointer to device struct of the device for which the interrupts
1675  *		are freed
1676  * @domid:	Id of the interrupt domain to operate on
1677  * @first:	First index to free (inclusive)
1678  * @last:	Last index to free (inclusive)
1679  */
1680 void msi_domain_free_irqs_range(struct device *dev, unsigned int domid,
1681 				unsigned int first, unsigned int last)
1682 {
1683 	guard(msi_descs_lock)(dev);
1684 	msi_domain_free_irqs_range_locked(dev, domid, first, last);
1685 }
1686 EXPORT_SYMBOL_GPL(msi_domain_free_irqs_all);
1687 
1688 /**
1689  * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain
1690  *				     associated to a device
1691  * @dev:	Pointer to device struct of the device for which the interrupts
1692  *		are freed
1693  * @domid:	The id of the domain to operate on
1694  *
1695  * Must be invoked from within a msi_lock_descs() / msi_unlock_descs()
1696  * pair. Use this for MSI irqdomains which implement their own vector
1697  * allocation.
1698  */
1699 void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid)
1700 {
1701 	msi_domain_free_irqs_range_locked(dev, domid, 0,
1702 					  msi_domain_get_hwsize(dev, domid) - 1);
1703 }
1704 
1705 /**
1706  * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain
1707  *			      associated to a device
1708  * @dev:	Pointer to device struct of the device for which the interrupts
1709  *		are freed
1710  * @domid:	The id of the domain to operate on
1711  */
1712 void msi_domain_free_irqs_all(struct device *dev, unsigned int domid)
1713 {
1714 	guard(msi_descs_lock)(dev);
1715 	msi_domain_free_irqs_all_locked(dev, domid);
1716 }
1717 
1718 /**
1719  * msi_device_domain_free_wired - Free a wired interrupt in @domain
1720  * @domain:	The domain to free the interrupt on
1721  * @virq:	The Linux interrupt number to free
1722  *
1723  * This is the counterpart of msi_device_domain_alloc_wired() for the
1724  * weird wired to MSI converting domains.
1725  */
1726 void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq)
1727 {
1728 	struct msi_desc *desc = irq_get_msi_desc(virq);
1729 	struct device *dev = domain->dev;
1730 
1731 	if (WARN_ON_ONCE(!dev || !desc || domain->bus_token != DOMAIN_BUS_WIRED_TO_MSI))
1732 		return;
1733 
1734 	guard(msi_descs_lock)(dev);
1735 	if (WARN_ON_ONCE(msi_get_device_domain(dev, MSI_DEFAULT_DOMAIN) != domain))
1736 		return;
1737 	msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, desc->msi_index,
1738 					  desc->msi_index);
1739 }
1740 
1741 /**
1742  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
1743  * @domain:	The interrupt domain to retrieve data from
1744  *
1745  * Return: the pointer to the msi_domain_info stored in @domain->host_data.
1746  */
1747 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
1748 {
1749 	return (struct msi_domain_info *)domain->host_data;
1750 }
1751 
1752 /**
1753  * msi_device_has_isolated_msi - True if the device has isolated MSI
1754  * @dev: The device to check
1755  *
1756  * Isolated MSI means that HW modeled by an irq_domain on the path from the
1757  * initiating device to the CPU will validate that the MSI message specifies an
1758  * interrupt number that the device is authorized to trigger. This must block
1759  * devices from triggering interrupts they are not authorized to trigger.
1760  * Currently authorization means the MSI vector is one assigned to the device.
1761  *
1762  * This is interesting for securing VFIO use cases where a rouge MSI (eg created
1763  * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
1764  * impact outside its security domain, eg userspace triggering interrupts on
1765  * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
1766  * triggering interrupts on another VM.
1767  */
1768 bool msi_device_has_isolated_msi(struct device *dev)
1769 {
1770 	struct irq_domain *domain = dev_get_msi_domain(dev);
1771 
1772 	for (; domain; domain = domain->parent)
1773 		if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
1774 			return true;
1775 	return arch_is_isolated_msi();
1776 }
1777 EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);
1778