xref: /linux/drivers/pci/msi/msi.c (revision 1504b6f97bad166b484d6f27dc99746fdca5f467)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * PCI Message Signaled Interrupt (MSI)
4  *
5  * Copyright (C) 2003-2004 Intel
6  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7  * Copyright (C) 2016 Christoph Hellwig.
8  */
9 #include <linux/err.h>
10 #include <linux/export.h>
11 #include <linux/irq.h>
12 
13 #include "../pci.h"
14 #include "msi.h"
15 
16 static int pci_msi_enable = 1;
17 int pci_msi_ignore_mask;
18 
19 static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
20 {
21 	raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
22 	unsigned long flags;
23 
24 	if (!desc->pci.msi_attrib.can_mask)
25 		return;
26 
27 	raw_spin_lock_irqsave(lock, flags);
28 	desc->pci.msi_mask &= ~clear;
29 	desc->pci.msi_mask |= set;
30 	pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos,
31 			       desc->pci.msi_mask);
32 	raw_spin_unlock_irqrestore(lock, flags);
33 }
34 
35 static inline void pci_msi_mask(struct msi_desc *desc, u32 mask)
36 {
37 	pci_msi_update_mask(desc, 0, mask);
38 }
39 
40 static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask)
41 {
42 	pci_msi_update_mask(desc, mask, 0);
43 }
44 
45 static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
46 {
47 	return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE;
48 }
49 
50 /*
51  * This internal function does not flush PCI writes to the device.  All
52  * users must ensure that they read from the device before either assuming
53  * that the device state is up to date, or returning out of this file.
54  * It does not affect the msi_desc::msix_ctrl cache either. Use with care!
55  */
56 static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
57 {
58 	void __iomem *desc_addr = pci_msix_desc_addr(desc);
59 
60 	if (desc->pci.msi_attrib.can_mask)
61 		writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
62 }
63 
64 static inline void pci_msix_mask(struct msi_desc *desc)
65 {
66 	desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
67 	pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
68 	/* Flush write to device */
69 	readl(desc->pci.mask_base);
70 }
71 
72 static inline void pci_msix_unmask(struct msi_desc *desc)
73 {
74 	desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
75 	pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
76 }
77 
78 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
79 {
80 	if (desc->pci.msi_attrib.is_msix)
81 		pci_msix_mask(desc);
82 	else
83 		pci_msi_mask(desc, mask);
84 }
85 
86 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
87 {
88 	if (desc->pci.msi_attrib.is_msix)
89 		pci_msix_unmask(desc);
90 	else
91 		pci_msi_unmask(desc, mask);
92 }
93 
94 /**
95  * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
96  * @data:	pointer to irqdata associated to that interrupt
97  */
98 void pci_msi_mask_irq(struct irq_data *data)
99 {
100 	struct msi_desc *desc = irq_data_get_msi_desc(data);
101 
102 	__pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
103 }
104 EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
105 
106 /**
107  * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
108  * @data:	pointer to irqdata associated to that interrupt
109  */
110 void pci_msi_unmask_irq(struct irq_data *data)
111 {
112 	struct msi_desc *desc = irq_data_get_msi_desc(data);
113 
114 	__pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
115 }
116 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
117 
118 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
119 {
120 	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
121 
122 	BUG_ON(dev->current_state != PCI_D0);
123 
124 	if (entry->pci.msi_attrib.is_msix) {
125 		void __iomem *base = pci_msix_desc_addr(entry);
126 
127 		if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual))
128 			return;
129 
130 		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
131 		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
132 		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
133 	} else {
134 		int pos = dev->msi_cap;
135 		u16 data;
136 
137 		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
138 				      &msg->address_lo);
139 		if (entry->pci.msi_attrib.is_64) {
140 			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
141 					      &msg->address_hi);
142 			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
143 		} else {
144 			msg->address_hi = 0;
145 			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
146 		}
147 		msg->data = data;
148 	}
149 }
150 
151 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
152 {
153 	struct pci_dev *dev = msi_desc_to_pci_dev(entry);
154 
155 	if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
156 		/* Don't touch the hardware now */
157 	} else if (entry->pci.msi_attrib.is_msix) {
158 		void __iomem *base = pci_msix_desc_addr(entry);
159 		u32 ctrl = entry->pci.msix_ctrl;
160 		bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
161 
162 		if (entry->pci.msi_attrib.is_virtual)
163 			goto skip;
164 
165 		/*
166 		 * The specification mandates that the entry is masked
167 		 * when the message is modified:
168 		 *
169 		 * "If software changes the Address or Data value of an
170 		 * entry while the entry is unmasked, the result is
171 		 * undefined."
172 		 */
173 		if (unmasked)
174 			pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
175 
176 		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
177 		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
178 		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
179 
180 		if (unmasked)
181 			pci_msix_write_vector_ctrl(entry, ctrl);
182 
183 		/* Ensure that the writes are visible in the device */
184 		readl(base + PCI_MSIX_ENTRY_DATA);
185 	} else {
186 		int pos = dev->msi_cap;
187 		u16 msgctl;
188 
189 		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
190 		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
191 		msgctl |= entry->pci.msi_attrib.multiple << 4;
192 		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
193 
194 		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
195 				       msg->address_lo);
196 		if (entry->pci.msi_attrib.is_64) {
197 			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
198 					       msg->address_hi);
199 			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
200 					      msg->data);
201 		} else {
202 			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
203 					      msg->data);
204 		}
205 		/* Ensure that the writes are visible in the device */
206 		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
207 	}
208 
209 skip:
210 	entry->msg = *msg;
211 
212 	if (entry->write_msi_msg)
213 		entry->write_msi_msg(entry, entry->write_msi_msg_data);
214 
215 }
216 
217 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
218 {
219 	struct msi_desc *entry = irq_get_msi_desc(irq);
220 
221 	__pci_write_msi_msg(entry, msg);
222 }
223 EXPORT_SYMBOL_GPL(pci_write_msi_msg);
224 
225 static void free_msi_irqs(struct pci_dev *dev)
226 {
227 	pci_msi_teardown_msi_irqs(dev);
228 
229 	if (dev->msix_base) {
230 		iounmap(dev->msix_base);
231 		dev->msix_base = NULL;
232 	}
233 }
234 
235 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
236 {
237 	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
238 		pci_intx(dev, enable);
239 }
240 
241 static void pci_msi_set_enable(struct pci_dev *dev, int enable)
242 {
243 	u16 control;
244 
245 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
246 	control &= ~PCI_MSI_FLAGS_ENABLE;
247 	if (enable)
248 		control |= PCI_MSI_FLAGS_ENABLE;
249 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
250 }
251 
252 /*
253  * Architecture override returns true when the PCI MSI message should be
254  * written by the generic restore function.
255  */
256 bool __weak arch_restore_msi_irqs(struct pci_dev *dev)
257 {
258 	return true;
259 }
260 
261 static void __pci_restore_msi_state(struct pci_dev *dev)
262 {
263 	struct msi_desc *entry;
264 	u16 control;
265 
266 	if (!dev->msi_enabled)
267 		return;
268 
269 	entry = irq_get_msi_desc(dev->irq);
270 
271 	pci_intx_for_msi(dev, 0);
272 	pci_msi_set_enable(dev, 0);
273 	if (arch_restore_msi_irqs(dev))
274 		__pci_write_msi_msg(entry, &entry->msg);
275 
276 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
277 	pci_msi_update_mask(entry, 0, 0);
278 	control &= ~PCI_MSI_FLAGS_QSIZE;
279 	control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
280 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
281 }
282 
283 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
284 {
285 	u16 ctrl;
286 
287 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
288 	ctrl &= ~clear;
289 	ctrl |= set;
290 	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
291 }
292 
293 static void __pci_restore_msix_state(struct pci_dev *dev)
294 {
295 	struct msi_desc *entry;
296 	bool write_msg;
297 
298 	if (!dev->msix_enabled)
299 		return;
300 
301 	/* route the table */
302 	pci_intx_for_msi(dev, 0);
303 	pci_msix_clear_and_set_ctrl(dev, 0,
304 				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
305 
306 	write_msg = arch_restore_msi_irqs(dev);
307 
308 	msi_lock_descs(&dev->dev);
309 	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
310 		if (write_msg)
311 			__pci_write_msi_msg(entry, &entry->msg);
312 		pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
313 	}
314 	msi_unlock_descs(&dev->dev);
315 
316 	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
317 }
318 
319 void pci_restore_msi_state(struct pci_dev *dev)
320 {
321 	__pci_restore_msi_state(dev);
322 	__pci_restore_msix_state(dev);
323 }
324 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
325 
326 static void pcim_msi_release(void *pcidev)
327 {
328 	struct pci_dev *dev = pcidev;
329 
330 	dev->is_msi_managed = false;
331 	pci_free_irq_vectors(dev);
332 }
333 
334 /*
335  * Needs to be separate from pcim_release to prevent an ordering problem
336  * vs. msi_device_data_release() in the MSI core code.
337  */
338 static int pcim_setup_msi_release(struct pci_dev *dev)
339 {
340 	int ret;
341 
342 	if (!pci_is_managed(dev) || dev->is_msi_managed)
343 		return 0;
344 
345 	ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
346 	if (!ret)
347 		dev->is_msi_managed = true;
348 	return ret;
349 }
350 
351 /*
352  * Ordering vs. devres: msi device data has to be installed first so that
353  * pcim_msi_release() is invoked before it on device release.
354  */
355 static int pci_setup_msi_context(struct pci_dev *dev)
356 {
357 	int ret = msi_setup_device_data(&dev->dev);
358 
359 	if (!ret)
360 		ret = pcim_setup_msi_release(dev);
361 	return ret;
362 }
363 
364 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
365 			      struct irq_affinity_desc *masks)
366 {
367 	struct msi_desc desc;
368 	u16 control;
369 
370 	/* MSI Entry Initialization */
371 	memset(&desc, 0, sizeof(desc));
372 
373 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
374 	/* Lies, damned lies, and MSIs */
375 	if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
376 		control |= PCI_MSI_FLAGS_MASKBIT;
377 	/* Respect XEN's mask disabling */
378 	if (pci_msi_ignore_mask)
379 		control &= ~PCI_MSI_FLAGS_MASKBIT;
380 
381 	desc.nvec_used			= nvec;
382 	desc.pci.msi_attrib.is_64	= !!(control & PCI_MSI_FLAGS_64BIT);
383 	desc.pci.msi_attrib.can_mask	= !!(control & PCI_MSI_FLAGS_MASKBIT);
384 	desc.pci.msi_attrib.default_irq	= dev->irq;
385 	desc.pci.msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
386 	desc.pci.msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
387 	desc.affinity			= masks;
388 
389 	if (control & PCI_MSI_FLAGS_64BIT)
390 		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
391 	else
392 		desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
393 
394 	/* Save the initial mask status */
395 	if (desc.pci.msi_attrib.can_mask)
396 		pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask);
397 
398 	return msi_add_msi_desc(&dev->dev, &desc);
399 }
400 
401 static int msi_verify_entries(struct pci_dev *dev)
402 {
403 	struct msi_desc *entry;
404 
405 	if (!dev->no_64bit_msi)
406 		return 0;
407 
408 	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
409 		if (entry->msg.address_hi) {
410 			pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
411 				entry->msg.address_hi, entry->msg.address_lo);
412 			break;
413 		}
414 	}
415 	return !entry ? 0 : -EIO;
416 }
417 
418 /**
419  * msi_capability_init - configure device's MSI capability structure
420  * @dev: pointer to the pci_dev data structure of MSI device function
421  * @nvec: number of interrupts to allocate
422  * @affd: description of automatic IRQ affinity assignments (may be %NULL)
423  *
424  * Setup the MSI capability structure of the device with the requested
425  * number of interrupts.  A return value of zero indicates the successful
426  * setup of an entry with the new MSI IRQ.  A negative return value indicates
427  * an error, and a positive return value indicates the number of interrupts
428  * which could have been allocated.
429  */
430 static int msi_capability_init(struct pci_dev *dev, int nvec,
431 			       struct irq_affinity *affd)
432 {
433 	struct irq_affinity_desc *masks = NULL;
434 	struct msi_desc *entry;
435 	int ret;
436 
437 	/*
438 	 * Disable MSI during setup in the hardware, but mark it enabled
439 	 * so that setup code can evaluate it.
440 	 */
441 	pci_msi_set_enable(dev, 0);
442 	dev->msi_enabled = 1;
443 
444 	if (affd)
445 		masks = irq_create_affinity_masks(nvec, affd);
446 
447 	msi_lock_descs(&dev->dev);
448 	ret = msi_setup_msi_desc(dev, nvec, masks);
449 	if (ret)
450 		goto fail;
451 
452 	/* All MSIs are unmasked by default; mask them all */
453 	entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
454 	pci_msi_mask(entry, msi_multi_mask(entry));
455 
456 	/* Configure MSI capability structure */
457 	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
458 	if (ret)
459 		goto err;
460 
461 	ret = msi_verify_entries(dev);
462 	if (ret)
463 		goto err;
464 
465 	/* Set MSI enabled bits	*/
466 	pci_intx_for_msi(dev, 0);
467 	pci_msi_set_enable(dev, 1);
468 
469 	pcibios_free_irq(dev);
470 	dev->irq = entry->irq;
471 	goto unlock;
472 
473 err:
474 	pci_msi_unmask(entry, msi_multi_mask(entry));
475 	free_msi_irqs(dev);
476 fail:
477 	dev->msi_enabled = 0;
478 unlock:
479 	msi_unlock_descs(&dev->dev);
480 	kfree(masks);
481 	return ret;
482 }
483 
484 static void __iomem *msix_map_region(struct pci_dev *dev,
485 				     unsigned int nr_entries)
486 {
487 	resource_size_t phys_addr;
488 	u32 table_offset;
489 	unsigned long flags;
490 	u8 bir;
491 
492 	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
493 			      &table_offset);
494 	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
495 	flags = pci_resource_flags(dev, bir);
496 	if (!flags || (flags & IORESOURCE_UNSET))
497 		return NULL;
498 
499 	table_offset &= PCI_MSIX_TABLE_OFFSET;
500 	phys_addr = pci_resource_start(dev, bir) + table_offset;
501 
502 	return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
503 }
504 
505 static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base,
506 				struct msix_entry *entries, int nvec,
507 				struct irq_affinity_desc *masks)
508 {
509 	int ret = 0, i, vec_count = pci_msix_vec_count(dev);
510 	struct irq_affinity_desc *curmsk;
511 	struct msi_desc desc;
512 	void __iomem *addr;
513 
514 	memset(&desc, 0, sizeof(desc));
515 
516 	desc.nvec_used			= 1;
517 	desc.pci.msi_attrib.is_msix	= 1;
518 	desc.pci.msi_attrib.is_64	= 1;
519 	desc.pci.msi_attrib.default_irq	= dev->irq;
520 	desc.pci.mask_base		= base;
521 
522 	for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) {
523 		desc.msi_index = entries ? entries[i].entry : i;
524 		desc.affinity = masks ? curmsk : NULL;
525 		desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count;
526 		desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
527 					       !desc.pci.msi_attrib.is_virtual;
528 
529 		if (desc.pci.msi_attrib.can_mask) {
530 			addr = pci_msix_desc_addr(&desc);
531 			desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
532 		}
533 
534 		ret = msi_add_msi_desc(&dev->dev, &desc);
535 		if (ret)
536 			break;
537 	}
538 	return ret;
539 }
540 
541 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
542 {
543 	struct msi_desc *desc;
544 
545 	if (entries) {
546 		msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) {
547 			entries->vector = desc->irq;
548 			entries++;
549 		}
550 	}
551 }
552 
553 static void msix_mask_all(void __iomem *base, int tsize)
554 {
555 	u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
556 	int i;
557 
558 	if (pci_msi_ignore_mask)
559 		return;
560 
561 	for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
562 		writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
563 }
564 
565 static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base,
566 				 struct msix_entry *entries, int nvec,
567 				 struct irq_affinity *affd)
568 {
569 	struct irq_affinity_desc *masks = NULL;
570 	int ret;
571 
572 	if (affd)
573 		masks = irq_create_affinity_masks(nvec, affd);
574 
575 	msi_lock_descs(&dev->dev);
576 	ret = msix_setup_msi_descs(dev, base, entries, nvec, masks);
577 	if (ret)
578 		goto out_free;
579 
580 	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
581 	if (ret)
582 		goto out_free;
583 
584 	/* Check if all MSI entries honor device restrictions */
585 	ret = msi_verify_entries(dev);
586 	if (ret)
587 		goto out_free;
588 
589 	msix_update_entries(dev, entries);
590 	goto out_unlock;
591 
592 out_free:
593 	free_msi_irqs(dev);
594 out_unlock:
595 	msi_unlock_descs(&dev->dev);
596 	kfree(masks);
597 	return ret;
598 }
599 
600 /**
601  * msix_capability_init - configure device's MSI-X capability
602  * @dev: pointer to the pci_dev data structure of MSI-X device function
603  * @entries: pointer to an array of struct msix_entry entries
604  * @nvec: number of @entries
605  * @affd: Optional pointer to enable automatic affinity assignment
606  *
607  * Setup the MSI-X capability structure of device function with a
608  * single MSI-X IRQ. A return of zero indicates the successful setup of
609  * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
610  **/
611 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
612 				int nvec, struct irq_affinity *affd)
613 {
614 	void __iomem *base;
615 	int ret, tsize;
616 	u16 control;
617 
618 	/*
619 	 * Some devices require MSI-X to be enabled before the MSI-X
620 	 * registers can be accessed.  Mask all the vectors to prevent
621 	 * interrupts coming in before they're fully set up.
622 	 */
623 	pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
624 				    PCI_MSIX_FLAGS_ENABLE);
625 
626 	/* Mark it enabled so setup functions can query it */
627 	dev->msix_enabled = 1;
628 
629 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
630 	/* Request & Map MSI-X table region */
631 	tsize = msix_table_size(control);
632 	base = msix_map_region(dev, tsize);
633 	if (!base) {
634 		ret = -ENOMEM;
635 		goto out_disable;
636 	}
637 
638 	dev->msix_base = base;
639 
640 	ret = msix_setup_interrupts(dev, base, entries, nvec, affd);
641 	if (ret)
642 		goto out_disable;
643 
644 	/* Disable INTX */
645 	pci_intx_for_msi(dev, 0);
646 
647 	/*
648 	 * Ensure that all table entries are masked to prevent
649 	 * stale entries from firing in a crash kernel.
650 	 *
651 	 * Done late to deal with a broken Marvell NVME device
652 	 * which takes the MSI-X mask bits into account even
653 	 * when MSI-X is disabled, which prevents MSI delivery.
654 	 */
655 	msix_mask_all(base, tsize);
656 	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
657 
658 	pcibios_free_irq(dev);
659 	return 0;
660 
661 out_disable:
662 	dev->msix_enabled = 0;
663 	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
664 
665 	return ret;
666 }
667 
668 /**
669  * pci_msi_supported - check whether MSI may be enabled on a device
670  * @dev: pointer to the pci_dev data structure of MSI device function
671  * @nvec: how many MSIs have been requested?
672  *
673  * Look at global flags, the device itself, and its parent buses
674  * to determine if MSI/-X are supported for the device. If MSI/-X is
675  * supported return 1, else return 0.
676  **/
677 static int pci_msi_supported(struct pci_dev *dev, int nvec)
678 {
679 	struct pci_bus *bus;
680 
681 	/* MSI must be globally enabled and supported by the device */
682 	if (!pci_msi_enable)
683 		return 0;
684 
685 	if (!dev || dev->no_msi)
686 		return 0;
687 
688 	/*
689 	 * You can't ask to have 0 or less MSIs configured.
690 	 *  a) it's stupid ..
691 	 *  b) the list manipulation code assumes nvec >= 1.
692 	 */
693 	if (nvec < 1)
694 		return 0;
695 
696 	/*
697 	 * Any bridge which does NOT route MSI transactions from its
698 	 * secondary bus to its primary bus must set NO_MSI flag on
699 	 * the secondary pci_bus.
700 	 *
701 	 * The NO_MSI flag can either be set directly by:
702 	 * - arch-specific PCI host bus controller drivers (deprecated)
703 	 * - quirks for specific PCI bridges
704 	 *
705 	 * or indirectly by platform-specific PCI host bridge drivers by
706 	 * advertising the 'msi_domain' property, which results in
707 	 * the NO_MSI flag when no MSI domain is found for this bridge
708 	 * at probe time.
709 	 */
710 	for (bus = dev->bus; bus; bus = bus->parent)
711 		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
712 			return 0;
713 
714 	return 1;
715 }
716 
717 /**
718  * pci_msi_vec_count - Return the number of MSI vectors a device can send
719  * @dev: device to report about
720  *
721  * This function returns the number of MSI vectors a device requested via
722  * Multiple Message Capable register. It returns a negative errno if the
723  * device is not capable sending MSI interrupts. Otherwise, the call succeeds
724  * and returns a power of two, up to a maximum of 2^5 (32), according to the
725  * MSI specification.
726  **/
727 int pci_msi_vec_count(struct pci_dev *dev)
728 {
729 	int ret;
730 	u16 msgctl;
731 
732 	if (!dev->msi_cap)
733 		return -EINVAL;
734 
735 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
736 	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
737 
738 	return ret;
739 }
740 EXPORT_SYMBOL(pci_msi_vec_count);
741 
742 static void pci_msi_shutdown(struct pci_dev *dev)
743 {
744 	struct msi_desc *desc;
745 
746 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
747 		return;
748 
749 	pci_msi_set_enable(dev, 0);
750 	pci_intx_for_msi(dev, 1);
751 	dev->msi_enabled = 0;
752 
753 	/* Return the device with MSI unmasked as initial states */
754 	desc = msi_first_desc(&dev->dev, MSI_DESC_ALL);
755 	if (!WARN_ON_ONCE(!desc))
756 		pci_msi_unmask(desc, msi_multi_mask(desc));
757 
758 	/* Restore dev->irq to its default pin-assertion IRQ */
759 	dev->irq = desc->pci.msi_attrib.default_irq;
760 	pcibios_alloc_irq(dev);
761 }
762 
763 void pci_disable_msi(struct pci_dev *dev)
764 {
765 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
766 		return;
767 
768 	msi_lock_descs(&dev->dev);
769 	pci_msi_shutdown(dev);
770 	free_msi_irqs(dev);
771 	msi_unlock_descs(&dev->dev);
772 }
773 EXPORT_SYMBOL(pci_disable_msi);
774 
775 /**
776  * pci_msix_vec_count - return the number of device's MSI-X table entries
777  * @dev: pointer to the pci_dev data structure of MSI-X device function
778  * This function returns the number of device's MSI-X table entries and
779  * therefore the number of MSI-X vectors device is capable of sending.
780  * It returns a negative errno if the device is not capable of sending MSI-X
781  * interrupts.
782  **/
783 int pci_msix_vec_count(struct pci_dev *dev)
784 {
785 	u16 control;
786 
787 	if (!dev->msix_cap)
788 		return -EINVAL;
789 
790 	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
791 	return msix_table_size(control);
792 }
793 EXPORT_SYMBOL(pci_msix_vec_count);
794 
795 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
796 			     int nvec, struct irq_affinity *affd, int flags)
797 {
798 	int nr_entries;
799 	int i, j;
800 
801 	if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
802 		return -EINVAL;
803 
804 	nr_entries = pci_msix_vec_count(dev);
805 	if (nr_entries < 0)
806 		return nr_entries;
807 	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
808 		return nr_entries;
809 
810 	if (entries) {
811 		/* Check for any invalid entries */
812 		for (i = 0; i < nvec; i++) {
813 			if (entries[i].entry >= nr_entries)
814 				return -EINVAL;		/* invalid entry */
815 			for (j = i + 1; j < nvec; j++) {
816 				if (entries[i].entry == entries[j].entry)
817 					return -EINVAL;	/* duplicate entry */
818 			}
819 		}
820 	}
821 
822 	/* Check whether driver already requested for MSI IRQ */
823 	if (dev->msi_enabled) {
824 		pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
825 		return -EINVAL;
826 	}
827 	return msix_capability_init(dev, entries, nvec, affd);
828 }
829 
830 static void pci_msix_shutdown(struct pci_dev *dev)
831 {
832 	struct msi_desc *desc;
833 
834 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
835 		return;
836 
837 	if (pci_dev_is_disconnected(dev)) {
838 		dev->msix_enabled = 0;
839 		return;
840 	}
841 
842 	/* Return the device with MSI-X masked as initial states */
843 	msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL)
844 		pci_msix_mask(desc);
845 
846 	pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
847 	pci_intx_for_msi(dev, 1);
848 	dev->msix_enabled = 0;
849 	pcibios_alloc_irq(dev);
850 }
851 
852 void pci_disable_msix(struct pci_dev *dev)
853 {
854 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
855 		return;
856 
857 	msi_lock_descs(&dev->dev);
858 	pci_msix_shutdown(dev);
859 	free_msi_irqs(dev);
860 	msi_unlock_descs(&dev->dev);
861 }
862 EXPORT_SYMBOL(pci_disable_msix);
863 
864 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
865 				  struct irq_affinity *affd)
866 {
867 	int nvec;
868 	int rc;
869 
870 	if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
871 		return -EINVAL;
872 
873 	/* Check whether driver already requested MSI-X IRQs */
874 	if (dev->msix_enabled) {
875 		pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
876 		return -EINVAL;
877 	}
878 
879 	if (maxvec < minvec)
880 		return -ERANGE;
881 
882 	if (WARN_ON_ONCE(dev->msi_enabled))
883 		return -EINVAL;
884 
885 	nvec = pci_msi_vec_count(dev);
886 	if (nvec < 0)
887 		return nvec;
888 	if (nvec < minvec)
889 		return -ENOSPC;
890 
891 	if (nvec > maxvec)
892 		nvec = maxvec;
893 
894 	rc = pci_setup_msi_context(dev);
895 	if (rc)
896 		return rc;
897 
898 	for (;;) {
899 		if (affd) {
900 			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
901 			if (nvec < minvec)
902 				return -ENOSPC;
903 		}
904 
905 		rc = msi_capability_init(dev, nvec, affd);
906 		if (rc == 0)
907 			return nvec;
908 
909 		if (rc < 0)
910 			return rc;
911 		if (rc < minvec)
912 			return -ENOSPC;
913 
914 		nvec = rc;
915 	}
916 }
917 
918 /* deprecated, don't use */
919 int pci_enable_msi(struct pci_dev *dev)
920 {
921 	int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
922 	if (rc < 0)
923 		return rc;
924 	return 0;
925 }
926 EXPORT_SYMBOL(pci_enable_msi);
927 
928 static int __pci_enable_msix_range(struct pci_dev *dev,
929 				   struct msix_entry *entries, int minvec,
930 				   int maxvec, struct irq_affinity *affd,
931 				   int flags)
932 {
933 	int rc, nvec = maxvec;
934 
935 	if (maxvec < minvec)
936 		return -ERANGE;
937 
938 	if (WARN_ON_ONCE(dev->msix_enabled))
939 		return -EINVAL;
940 
941 	rc = pci_setup_msi_context(dev);
942 	if (rc)
943 		return rc;
944 
945 	for (;;) {
946 		if (affd) {
947 			nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
948 			if (nvec < minvec)
949 				return -ENOSPC;
950 		}
951 
952 		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
953 		if (rc == 0)
954 			return nvec;
955 
956 		if (rc < 0)
957 			return rc;
958 		if (rc < minvec)
959 			return -ENOSPC;
960 
961 		nvec = rc;
962 	}
963 }
964 
965 /**
966  * pci_enable_msix_range - configure device's MSI-X capability structure
967  * @dev: pointer to the pci_dev data structure of MSI-X device function
968  * @entries: pointer to an array of MSI-X entries
969  * @minvec: minimum number of MSI-X IRQs requested
970  * @maxvec: maximum number of MSI-X IRQs requested
971  *
972  * Setup the MSI-X capability structure of device function with a maximum
973  * possible number of interrupts in the range between @minvec and @maxvec
974  * upon its software driver call to request for MSI-X mode enabled on its
975  * hardware device function. It returns a negative errno if an error occurs.
976  * If it succeeds, it returns the actual number of interrupts allocated and
977  * indicates the successful configuration of MSI-X capability structure
978  * with new allocated MSI-X interrupts.
979  **/
980 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
981 		int minvec, int maxvec)
982 {
983 	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
984 }
985 EXPORT_SYMBOL(pci_enable_msix_range);
986 
987 /**
988  * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
989  * @dev:		PCI device to operate on
990  * @min_vecs:		minimum number of vectors required (must be >= 1)
991  * @max_vecs:		maximum (desired) number of vectors
992  * @flags:		flags or quirks for the allocation
993  * @affd:		optional description of the affinity requirements
994  *
995  * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
996  * vectors if available, and fall back to a single legacy vector
997  * if neither is available.  Return the number of vectors allocated,
998  * (which might be smaller than @max_vecs) if successful, or a negative
999  * error code on error. If less than @min_vecs interrupt vectors are
1000  * available for @dev the function will fail with -ENOSPC.
1001  *
1002  * To get the Linux IRQ number used for a vector that can be passed to
1003  * request_irq() use the pci_irq_vector() helper.
1004  */
1005 int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
1006 				   unsigned int max_vecs, unsigned int flags,
1007 				   struct irq_affinity *affd)
1008 {
1009 	struct irq_affinity msi_default_affd = {0};
1010 	int nvecs = -ENOSPC;
1011 
1012 	if (flags & PCI_IRQ_AFFINITY) {
1013 		if (!affd)
1014 			affd = &msi_default_affd;
1015 	} else {
1016 		if (WARN_ON(affd))
1017 			affd = NULL;
1018 	}
1019 
1020 	if (flags & PCI_IRQ_MSIX) {
1021 		nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
1022 						affd, flags);
1023 		if (nvecs > 0)
1024 			return nvecs;
1025 	}
1026 
1027 	if (flags & PCI_IRQ_MSI) {
1028 		nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
1029 		if (nvecs > 0)
1030 			return nvecs;
1031 	}
1032 
1033 	/* use legacy IRQ if allowed */
1034 	if (flags & PCI_IRQ_LEGACY) {
1035 		if (min_vecs == 1 && dev->irq) {
1036 			/*
1037 			 * Invoke the affinity spreading logic to ensure that
1038 			 * the device driver can adjust queue configuration
1039 			 * for the single interrupt case.
1040 			 */
1041 			if (affd)
1042 				irq_create_affinity_masks(1, affd);
1043 			pci_intx(dev, 1);
1044 			return 1;
1045 		}
1046 	}
1047 
1048 	return nvecs;
1049 }
1050 EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
1051 
1052 /**
1053  * pci_free_irq_vectors - free previously allocated IRQs for a device
1054  * @dev:		PCI device to operate on
1055  *
1056  * Undoes the allocations and enabling in pci_alloc_irq_vectors().
1057  */
1058 void pci_free_irq_vectors(struct pci_dev *dev)
1059 {
1060 	pci_disable_msix(dev);
1061 	pci_disable_msi(dev);
1062 }
1063 EXPORT_SYMBOL(pci_free_irq_vectors);
1064 
1065 /**
1066  * pci_irq_vector - return Linux IRQ number of a device vector
1067  * @dev:	PCI device to operate on
1068  * @nr:		Interrupt vector index (0-based)
1069  *
1070  * @nr has the following meanings depending on the interrupt mode:
1071  *   MSI-X:	The index in the MSI-X vector table
1072  *   MSI:	The index of the enabled MSI vectors
1073  *   INTx:	Must be 0
1074  *
1075  * Return: The Linux interrupt number or -EINVAl if @nr is out of range.
1076  */
1077 int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1078 {
1079 	unsigned int irq;
1080 
1081 	if (!dev->msi_enabled && !dev->msix_enabled)
1082 		return !nr ? dev->irq : -EINVAL;
1083 
1084 	irq = msi_get_virq(&dev->dev, nr);
1085 	return irq ? irq : -EINVAL;
1086 }
1087 EXPORT_SYMBOL(pci_irq_vector);
1088 
1089 /**
1090  * pci_irq_get_affinity - return the affinity of a particular MSI vector
1091  * @dev:	PCI device to operate on
1092  * @nr:		device-relative interrupt vector index (0-based).
1093  *
1094  * @nr has the following meanings depending on the interrupt mode:
1095  *   MSI-X:	The index in the MSI-X vector table
1096  *   MSI:	The index of the enabled MSI vectors
1097  *   INTx:	Must be 0
1098  *
1099  * Return: A cpumask pointer or NULL if @nr is out of range
1100  */
1101 const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1102 {
1103 	int idx, irq = pci_irq_vector(dev, nr);
1104 	struct msi_desc *desc;
1105 
1106 	if (WARN_ON_ONCE(irq <= 0))
1107 		return NULL;
1108 
1109 	desc = irq_get_msi_desc(irq);
1110 	/* Non-MSI does not have the information handy */
1111 	if (!desc)
1112 		return cpu_possible_mask;
1113 
1114 	/* MSI[X] interrupts can be allocated without affinity descriptor */
1115 	if (!desc->affinity)
1116 		return NULL;
1117 
1118 	/*
1119 	 * MSI has a mask array in the descriptor.
1120 	 * MSI-X has a single mask.
1121 	 */
1122 	idx = dev->msi_enabled ? nr : 0;
1123 	return &desc->affinity[idx].mask;
1124 }
1125 EXPORT_SYMBOL(pci_irq_get_affinity);
1126 
1127 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
1128 {
1129 	return to_pci_dev(desc->dev);
1130 }
1131 EXPORT_SYMBOL(msi_desc_to_pci_dev);
1132 
1133 void pci_no_msi(void)
1134 {
1135 	pci_msi_enable = 0;
1136 }
1137 
1138 /**
1139  * pci_msi_enabled - is MSI enabled?
1140  *
1141  * Returns true if MSI has not been disabled by the command-line option
1142  * pci=nomsi.
1143  **/
1144 int pci_msi_enabled(void)
1145 {
1146 	return pci_msi_enable;
1147 }
1148 EXPORT_SYMBOL(pci_msi_enabled);
1149