1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 * Copyright (C) 2016 Christoph Hellwig. 8 */ 9 #include <linux/bitfield.h> 10 #include <linux/err.h> 11 #include <linux/export.h> 12 #include <linux/irq.h> 13 14 #include "../pci.h" 15 #include "msi.h" 16 17 int pci_msi_enable = 1; 18 int pci_msi_ignore_mask; 19 20 /** 21 * pci_msi_supported - check whether MSI may be enabled on a device 22 * @dev: pointer to the pci_dev data structure of MSI device function 23 * @nvec: how many MSIs have been requested? 24 * 25 * Look at global flags, the device itself, and its parent buses 26 * to determine if MSI/-X are supported for the device. If MSI/-X is 27 * supported return 1, else return 0. 28 **/ 29 static int pci_msi_supported(struct pci_dev *dev, int nvec) 30 { 31 struct pci_bus *bus; 32 33 /* MSI must be globally enabled and supported by the device */ 34 if (!pci_msi_enable) 35 return 0; 36 37 if (!dev || dev->no_msi) 38 return 0; 39 40 /* 41 * You can't ask to have 0 or less MSIs configured. 42 * a) it's stupid .. 43 * b) the list manipulation code assumes nvec >= 1. 44 */ 45 if (nvec < 1) 46 return 0; 47 48 /* 49 * Any bridge which does NOT route MSI transactions from its 50 * secondary bus to its primary bus must set NO_MSI flag on 51 * the secondary pci_bus. 52 * 53 * The NO_MSI flag can either be set directly by: 54 * - arch-specific PCI host bus controller drivers (deprecated) 55 * - quirks for specific PCI bridges 56 * 57 * or indirectly by platform-specific PCI host bridge drivers by 58 * advertising the 'msi_domain' property, which results in 59 * the NO_MSI flag when no MSI domain is found for this bridge 60 * at probe time. 61 */ 62 for (bus = dev->bus; bus; bus = bus->parent) 63 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 64 return 0; 65 66 return 1; 67 } 68 69 static void pcim_msi_release(void *pcidev) 70 { 71 struct pci_dev *dev = pcidev; 72 73 dev->is_msi_managed = false; 74 pci_free_irq_vectors(dev); 75 } 76 77 /* 78 * Needs to be separate from pcim_release to prevent an ordering problem 79 * vs. msi_device_data_release() in the MSI core code. 80 */ 81 static int pcim_setup_msi_release(struct pci_dev *dev) 82 { 83 int ret; 84 85 if (!pci_is_managed(dev) || dev->is_msi_managed) 86 return 0; 87 88 ret = devm_add_action(&dev->dev, pcim_msi_release, dev); 89 if (!ret) 90 dev->is_msi_managed = true; 91 return ret; 92 } 93 94 /* 95 * Ordering vs. devres: msi device data has to be installed first so that 96 * pcim_msi_release() is invoked before it on device release. 97 */ 98 static int pci_setup_msi_context(struct pci_dev *dev) 99 { 100 int ret = msi_setup_device_data(&dev->dev); 101 102 if (!ret) 103 ret = pcim_setup_msi_release(dev); 104 return ret; 105 } 106 107 /* 108 * Helper functions for mask/unmask and MSI message handling 109 */ 110 111 void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) 112 { 113 raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; 114 unsigned long flags; 115 116 if (!desc->pci.msi_attrib.can_mask) 117 return; 118 119 raw_spin_lock_irqsave(lock, flags); 120 desc->pci.msi_mask &= ~clear; 121 desc->pci.msi_mask |= set; 122 pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos, 123 desc->pci.msi_mask); 124 raw_spin_unlock_irqrestore(lock, flags); 125 } 126 127 /** 128 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts 129 * @data: pointer to irqdata associated to that interrupt 130 */ 131 void pci_msi_mask_irq(struct irq_data *data) 132 { 133 struct msi_desc *desc = irq_data_get_msi_desc(data); 134 135 __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq)); 136 } 137 EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 138 139 /** 140 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts 141 * @data: pointer to irqdata associated to that interrupt 142 */ 143 void pci_msi_unmask_irq(struct irq_data *data) 144 { 145 struct msi_desc *desc = irq_data_get_msi_desc(data); 146 147 __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq)); 148 } 149 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); 150 151 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 152 { 153 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 154 155 BUG_ON(dev->current_state != PCI_D0); 156 157 if (entry->pci.msi_attrib.is_msix) { 158 void __iomem *base = pci_msix_desc_addr(entry); 159 160 if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual)) 161 return; 162 163 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 164 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 165 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 166 } else { 167 int pos = dev->msi_cap; 168 u16 data; 169 170 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 171 &msg->address_lo); 172 if (entry->pci.msi_attrib.is_64) { 173 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 174 &msg->address_hi); 175 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 176 } else { 177 msg->address_hi = 0; 178 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 179 } 180 msg->data = data; 181 } 182 } 183 184 static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc, 185 struct msi_msg *msg) 186 { 187 int pos = dev->msi_cap; 188 u16 msgctl; 189 190 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 191 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 192 msgctl |= FIELD_PREP(PCI_MSI_FLAGS_QSIZE, desc->pci.msi_attrib.multiple); 193 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 194 195 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo); 196 if (desc->pci.msi_attrib.is_64) { 197 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, msg->address_hi); 198 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data); 199 } else { 200 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); 201 } 202 /* Ensure that the writes are visible in the device */ 203 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 204 } 205 206 static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg) 207 { 208 void __iomem *base = pci_msix_desc_addr(desc); 209 u32 ctrl = desc->pci.msix_ctrl; 210 bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); 211 212 if (desc->pci.msi_attrib.is_virtual) 213 return; 214 /* 215 * The specification mandates that the entry is masked 216 * when the message is modified: 217 * 218 * "If software changes the Address or Data value of an 219 * entry while the entry is unmasked, the result is 220 * undefined." 221 */ 222 if (unmasked) 223 pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); 224 225 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 226 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 227 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 228 229 if (unmasked) 230 pci_msix_write_vector_ctrl(desc, ctrl); 231 232 /* Ensure that the writes are visible in the device */ 233 readl(base + PCI_MSIX_ENTRY_DATA); 234 } 235 236 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 237 { 238 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 239 240 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { 241 /* Don't touch the hardware now */ 242 } else if (entry->pci.msi_attrib.is_msix) { 243 pci_write_msg_msix(entry, msg); 244 } else { 245 pci_write_msg_msi(dev, entry, msg); 246 } 247 248 entry->msg = *msg; 249 250 if (entry->write_msi_msg) 251 entry->write_msi_msg(entry, entry->write_msi_msg_data); 252 } 253 254 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 255 { 256 struct msi_desc *entry = irq_get_msi_desc(irq); 257 258 __pci_write_msi_msg(entry, msg); 259 } 260 EXPORT_SYMBOL_GPL(pci_write_msi_msg); 261 262 263 /* PCI/MSI specific functionality */ 264 265 static void pci_intx_for_msi(struct pci_dev *dev, int enable) 266 { 267 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 268 pci_intx(dev, enable); 269 } 270 271 static void pci_msi_set_enable(struct pci_dev *dev, int enable) 272 { 273 u16 control; 274 275 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 276 control &= ~PCI_MSI_FLAGS_ENABLE; 277 if (enable) 278 control |= PCI_MSI_FLAGS_ENABLE; 279 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 280 } 281 282 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, 283 struct irq_affinity_desc *masks) 284 { 285 struct msi_desc desc; 286 u16 control; 287 288 /* MSI Entry Initialization */ 289 memset(&desc, 0, sizeof(desc)); 290 291 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 292 /* Lies, damned lies, and MSIs */ 293 if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) 294 control |= PCI_MSI_FLAGS_MASKBIT; 295 /* Respect XEN's mask disabling */ 296 if (pci_msi_ignore_mask) 297 control &= ~PCI_MSI_FLAGS_MASKBIT; 298 299 desc.nvec_used = nvec; 300 desc.pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 301 desc.pci.msi_attrib.can_mask = !!(control & PCI_MSI_FLAGS_MASKBIT); 302 desc.pci.msi_attrib.default_irq = dev->irq; 303 desc.pci.msi_attrib.multi_cap = FIELD_GET(PCI_MSI_FLAGS_QMASK, control); 304 desc.pci.msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 305 desc.affinity = masks; 306 307 if (control & PCI_MSI_FLAGS_64BIT) 308 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 309 else 310 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 311 312 /* Save the initial mask status */ 313 if (desc.pci.msi_attrib.can_mask) 314 pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask); 315 316 return msi_insert_msi_desc(&dev->dev, &desc); 317 } 318 319 static int msi_verify_entries(struct pci_dev *dev) 320 { 321 struct msi_desc *entry; 322 323 if (!dev->no_64bit_msi) 324 return 0; 325 326 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 327 if (entry->msg.address_hi) { 328 pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n", 329 entry->msg.address_hi, entry->msg.address_lo); 330 break; 331 } 332 } 333 return !entry ? 0 : -EIO; 334 } 335 336 /** 337 * msi_capability_init - configure device's MSI capability structure 338 * @dev: pointer to the pci_dev data structure of MSI device function 339 * @nvec: number of interrupts to allocate 340 * @affd: description of automatic IRQ affinity assignments (may be %NULL) 341 * 342 * Setup the MSI capability structure of the device with the requested 343 * number of interrupts. A return value of zero indicates the successful 344 * setup of an entry with the new MSI IRQ. A negative return value indicates 345 * an error, and a positive return value indicates the number of interrupts 346 * which could have been allocated. 347 */ 348 static int msi_capability_init(struct pci_dev *dev, int nvec, 349 struct irq_affinity *affd) 350 { 351 struct irq_affinity_desc *masks = NULL; 352 struct msi_desc *entry; 353 int ret; 354 355 /* Reject multi-MSI early on irq domain enabled architectures */ 356 if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) 357 return 1; 358 359 /* 360 * Disable MSI during setup in the hardware, but mark it enabled 361 * so that setup code can evaluate it. 362 */ 363 pci_msi_set_enable(dev, 0); 364 dev->msi_enabled = 1; 365 366 if (affd) 367 masks = irq_create_affinity_masks(nvec, affd); 368 369 msi_lock_descs(&dev->dev); 370 ret = msi_setup_msi_desc(dev, nvec, masks); 371 if (ret) 372 goto fail; 373 374 /* All MSIs are unmasked by default; mask them all */ 375 entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); 376 pci_msi_mask(entry, msi_multi_mask(entry)); 377 378 /* Configure MSI capability structure */ 379 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 380 if (ret) 381 goto err; 382 383 ret = msi_verify_entries(dev); 384 if (ret) 385 goto err; 386 387 /* Set MSI enabled bits */ 388 pci_intx_for_msi(dev, 0); 389 pci_msi_set_enable(dev, 1); 390 391 pcibios_free_irq(dev); 392 dev->irq = entry->irq; 393 goto unlock; 394 395 err: 396 pci_msi_unmask(entry, msi_multi_mask(entry)); 397 pci_free_msi_irqs(dev); 398 fail: 399 dev->msi_enabled = 0; 400 unlock: 401 msi_unlock_descs(&dev->dev); 402 kfree(masks); 403 return ret; 404 } 405 406 int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 407 struct irq_affinity *affd) 408 { 409 int nvec; 410 int rc; 411 412 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) 413 return -EINVAL; 414 415 /* Check whether driver already requested MSI-X IRQs */ 416 if (dev->msix_enabled) { 417 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 418 return -EINVAL; 419 } 420 421 if (maxvec < minvec) 422 return -ERANGE; 423 424 if (WARN_ON_ONCE(dev->msi_enabled)) 425 return -EINVAL; 426 427 nvec = pci_msi_vec_count(dev); 428 if (nvec < 0) 429 return nvec; 430 if (nvec < minvec) 431 return -ENOSPC; 432 433 if (nvec > maxvec) 434 nvec = maxvec; 435 436 rc = pci_setup_msi_context(dev); 437 if (rc) 438 return rc; 439 440 if (!pci_setup_msi_device_domain(dev)) 441 return -ENODEV; 442 443 for (;;) { 444 if (affd) { 445 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 446 if (nvec < minvec) 447 return -ENOSPC; 448 } 449 450 rc = msi_capability_init(dev, nvec, affd); 451 if (rc == 0) 452 return nvec; 453 454 if (rc < 0) 455 return rc; 456 if (rc < minvec) 457 return -ENOSPC; 458 459 nvec = rc; 460 } 461 } 462 463 /** 464 * pci_msi_vec_count - Return the number of MSI vectors a device can send 465 * @dev: device to report about 466 * 467 * This function returns the number of MSI vectors a device requested via 468 * Multiple Message Capable register. It returns a negative errno if the 469 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 470 * and returns a power of two, up to a maximum of 2^5 (32), according to the 471 * MSI specification. 472 **/ 473 int pci_msi_vec_count(struct pci_dev *dev) 474 { 475 int ret; 476 u16 msgctl; 477 478 if (!dev->msi_cap) 479 return -EINVAL; 480 481 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 482 ret = 1 << FIELD_GET(PCI_MSI_FLAGS_QMASK, msgctl); 483 484 return ret; 485 } 486 EXPORT_SYMBOL(pci_msi_vec_count); 487 488 /* 489 * Architecture override returns true when the PCI MSI message should be 490 * written by the generic restore function. 491 */ 492 bool __weak arch_restore_msi_irqs(struct pci_dev *dev) 493 { 494 return true; 495 } 496 497 void __pci_restore_msi_state(struct pci_dev *dev) 498 { 499 struct msi_desc *entry; 500 u16 control; 501 502 if (!dev->msi_enabled) 503 return; 504 505 entry = irq_get_msi_desc(dev->irq); 506 507 pci_intx_for_msi(dev, 0); 508 pci_msi_set_enable(dev, 0); 509 if (arch_restore_msi_irqs(dev)) 510 __pci_write_msi_msg(entry, &entry->msg); 511 512 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 513 pci_msi_update_mask(entry, 0, 0); 514 control &= ~PCI_MSI_FLAGS_QSIZE; 515 control |= PCI_MSI_FLAGS_ENABLE | 516 FIELD_PREP(PCI_MSI_FLAGS_QSIZE, entry->pci.msi_attrib.multiple); 517 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 518 } 519 520 void pci_msi_shutdown(struct pci_dev *dev) 521 { 522 struct msi_desc *desc; 523 524 if (!pci_msi_enable || !dev || !dev->msi_enabled) 525 return; 526 527 pci_msi_set_enable(dev, 0); 528 pci_intx_for_msi(dev, 1); 529 dev->msi_enabled = 0; 530 531 /* Return the device with MSI unmasked as initial states */ 532 desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); 533 if (!WARN_ON_ONCE(!desc)) 534 pci_msi_unmask(desc, msi_multi_mask(desc)); 535 536 /* Restore dev->irq to its default pin-assertion IRQ */ 537 dev->irq = desc->pci.msi_attrib.default_irq; 538 pcibios_alloc_irq(dev); 539 } 540 541 /* PCI/MSI-X specific functionality */ 542 543 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) 544 { 545 u16 ctrl; 546 547 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); 548 ctrl &= ~clear; 549 ctrl |= set; 550 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); 551 } 552 553 static void __iomem *msix_map_region(struct pci_dev *dev, 554 unsigned int nr_entries) 555 { 556 resource_size_t phys_addr; 557 u32 table_offset; 558 unsigned long flags; 559 u8 bir; 560 561 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 562 &table_offset); 563 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 564 flags = pci_resource_flags(dev, bir); 565 if (!flags || (flags & IORESOURCE_UNSET)) 566 return NULL; 567 568 table_offset &= PCI_MSIX_TABLE_OFFSET; 569 phys_addr = pci_resource_start(dev, bir) + table_offset; 570 571 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 572 } 573 574 /** 575 * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation 576 * @dev: The PCI device for which the descriptor is prepared 577 * @desc: The MSI descriptor for preparation 578 * 579 * This is separate from msix_setup_msi_descs() below to handle dynamic 580 * allocations for MSI-X after initial enablement. 581 * 582 * Ideally the whole MSI-X setup would work that way, but there is no way to 583 * support this for the legacy arch_setup_msi_irqs() mechanism and for the 584 * fake irq domains like the x86 XEN one. Sigh... 585 * 586 * The descriptor is zeroed and only @desc::msi_index and @desc::affinity 587 * are set. When called from msix_setup_msi_descs() then the is_virtual 588 * attribute is initialized as well. 589 * 590 * Fill in the rest. 591 */ 592 void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) 593 { 594 desc->nvec_used = 1; 595 desc->pci.msi_attrib.is_msix = 1; 596 desc->pci.msi_attrib.is_64 = 1; 597 desc->pci.msi_attrib.default_irq = dev->irq; 598 desc->pci.mask_base = dev->msix_base; 599 desc->pci.msi_attrib.can_mask = !pci_msi_ignore_mask && 600 !desc->pci.msi_attrib.is_virtual; 601 602 if (desc->pci.msi_attrib.can_mask) { 603 void __iomem *addr = pci_msix_desc_addr(desc); 604 605 desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 606 } 607 } 608 609 static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries, 610 int nvec, struct irq_affinity_desc *masks) 611 { 612 int ret = 0, i, vec_count = pci_msix_vec_count(dev); 613 struct irq_affinity_desc *curmsk; 614 struct msi_desc desc; 615 616 memset(&desc, 0, sizeof(desc)); 617 618 for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { 619 desc.msi_index = entries ? entries[i].entry : i; 620 desc.affinity = masks ? curmsk : NULL; 621 desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; 622 623 msix_prepare_msi_desc(dev, &desc); 624 625 ret = msi_insert_msi_desc(&dev->dev, &desc); 626 if (ret) 627 break; 628 } 629 return ret; 630 } 631 632 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) 633 { 634 struct msi_desc *desc; 635 636 if (entries) { 637 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) { 638 entries->vector = desc->irq; 639 entries++; 640 } 641 } 642 } 643 644 static void msix_mask_all(void __iomem *base, int tsize) 645 { 646 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 647 int i; 648 649 if (pci_msi_ignore_mask) 650 return; 651 652 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) 653 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); 654 } 655 656 static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, 657 int nvec, struct irq_affinity *affd) 658 { 659 struct irq_affinity_desc *masks = NULL; 660 int ret; 661 662 if (affd) 663 masks = irq_create_affinity_masks(nvec, affd); 664 665 msi_lock_descs(&dev->dev); 666 ret = msix_setup_msi_descs(dev, entries, nvec, masks); 667 if (ret) 668 goto out_free; 669 670 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 671 if (ret) 672 goto out_free; 673 674 /* Check if all MSI entries honor device restrictions */ 675 ret = msi_verify_entries(dev); 676 if (ret) 677 goto out_free; 678 679 msix_update_entries(dev, entries); 680 goto out_unlock; 681 682 out_free: 683 pci_free_msi_irqs(dev); 684 out_unlock: 685 msi_unlock_descs(&dev->dev); 686 kfree(masks); 687 return ret; 688 } 689 690 /** 691 * msix_capability_init - configure device's MSI-X capability 692 * @dev: pointer to the pci_dev data structure of MSI-X device function 693 * @entries: pointer to an array of struct msix_entry entries 694 * @nvec: number of @entries 695 * @affd: Optional pointer to enable automatic affinity assignment 696 * 697 * Setup the MSI-X capability structure of device function with a 698 * single MSI-X IRQ. A return of zero indicates the successful setup of 699 * requested MSI-X entries with allocated IRQs or non-zero for otherwise. 700 **/ 701 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 702 int nvec, struct irq_affinity *affd) 703 { 704 int ret, tsize; 705 u16 control; 706 707 /* 708 * Some devices require MSI-X to be enabled before the MSI-X 709 * registers can be accessed. Mask all the vectors to prevent 710 * interrupts coming in before they're fully set up. 711 */ 712 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | 713 PCI_MSIX_FLAGS_ENABLE); 714 715 /* Mark it enabled so setup functions can query it */ 716 dev->msix_enabled = 1; 717 718 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 719 /* Request & Map MSI-X table region */ 720 tsize = msix_table_size(control); 721 dev->msix_base = msix_map_region(dev, tsize); 722 if (!dev->msix_base) { 723 ret = -ENOMEM; 724 goto out_disable; 725 } 726 727 ret = msix_setup_interrupts(dev, entries, nvec, affd); 728 if (ret) 729 goto out_disable; 730 731 /* Disable INTX */ 732 pci_intx_for_msi(dev, 0); 733 734 /* 735 * Ensure that all table entries are masked to prevent 736 * stale entries from firing in a crash kernel. 737 * 738 * Done late to deal with a broken Marvell NVME device 739 * which takes the MSI-X mask bits into account even 740 * when MSI-X is disabled, which prevents MSI delivery. 741 */ 742 msix_mask_all(dev->msix_base, tsize); 743 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 744 745 pcibios_free_irq(dev); 746 return 0; 747 748 out_disable: 749 dev->msix_enabled = 0; 750 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); 751 752 return ret; 753 } 754 755 static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, int nvec) 756 { 757 bool nogap; 758 int i, j; 759 760 if (!entries) 761 return true; 762 763 nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY); 764 765 for (i = 0; i < nvec; i++) { 766 /* Check for duplicate entries */ 767 for (j = i + 1; j < nvec; j++) { 768 if (entries[i].entry == entries[j].entry) 769 return false; 770 } 771 /* Check for unsupported gaps */ 772 if (nogap && entries[i].entry != i) 773 return false; 774 } 775 return true; 776 } 777 778 int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, 779 int maxvec, struct irq_affinity *affd, int flags) 780 { 781 int hwsize, rc, nvec = maxvec; 782 783 if (maxvec < minvec) 784 return -ERANGE; 785 786 if (dev->msi_enabled) { 787 pci_info(dev, "can't enable MSI-X (MSI already enabled)\n"); 788 return -EINVAL; 789 } 790 791 if (WARN_ON_ONCE(dev->msix_enabled)) 792 return -EINVAL; 793 794 /* Check MSI-X early on irq domain enabled architectures */ 795 if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY)) 796 return -ENOTSUPP; 797 798 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) 799 return -EINVAL; 800 801 hwsize = pci_msix_vec_count(dev); 802 if (hwsize < 0) 803 return hwsize; 804 805 if (!pci_msix_validate_entries(dev, entries, nvec)) 806 return -EINVAL; 807 808 if (hwsize < nvec) { 809 /* Keep the IRQ virtual hackery working */ 810 if (flags & PCI_IRQ_VIRTUAL) 811 hwsize = nvec; 812 else 813 nvec = hwsize; 814 } 815 816 if (nvec < minvec) 817 return -ENOSPC; 818 819 rc = pci_setup_msi_context(dev); 820 if (rc) 821 return rc; 822 823 if (!pci_setup_msix_device_domain(dev, hwsize)) 824 return -ENODEV; 825 826 for (;;) { 827 if (affd) { 828 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 829 if (nvec < minvec) 830 return -ENOSPC; 831 } 832 833 rc = msix_capability_init(dev, entries, nvec, affd); 834 if (rc == 0) 835 return nvec; 836 837 if (rc < 0) 838 return rc; 839 if (rc < minvec) 840 return -ENOSPC; 841 842 nvec = rc; 843 } 844 } 845 846 void __pci_restore_msix_state(struct pci_dev *dev) 847 { 848 struct msi_desc *entry; 849 bool write_msg; 850 851 if (!dev->msix_enabled) 852 return; 853 854 /* route the table */ 855 pci_intx_for_msi(dev, 0); 856 pci_msix_clear_and_set_ctrl(dev, 0, 857 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 858 859 write_msg = arch_restore_msi_irqs(dev); 860 861 msi_lock_descs(&dev->dev); 862 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 863 if (write_msg) 864 __pci_write_msi_msg(entry, &entry->msg); 865 pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); 866 } 867 msi_unlock_descs(&dev->dev); 868 869 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 870 } 871 872 void pci_msix_shutdown(struct pci_dev *dev) 873 { 874 struct msi_desc *desc; 875 876 if (!pci_msi_enable || !dev || !dev->msix_enabled) 877 return; 878 879 if (pci_dev_is_disconnected(dev)) { 880 dev->msix_enabled = 0; 881 return; 882 } 883 884 /* Return the device with MSI-X masked as initial states */ 885 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) 886 pci_msix_mask(desc); 887 888 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 889 pci_intx_for_msi(dev, 1); 890 dev->msix_enabled = 0; 891 pcibios_alloc_irq(dev); 892 } 893 894 /* Common interfaces */ 895 896 void pci_free_msi_irqs(struct pci_dev *dev) 897 { 898 pci_msi_teardown_msi_irqs(dev); 899 900 if (dev->msix_base) { 901 iounmap(dev->msix_base); 902 dev->msix_base = NULL; 903 } 904 } 905 906 /* Misc. infrastructure */ 907 908 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 909 { 910 return to_pci_dev(desc->dev); 911 } 912 EXPORT_SYMBOL(msi_desc_to_pci_dev); 913 914 void pci_no_msi(void) 915 { 916 pci_msi_enable = 0; 917 } 918