1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 * Copyright (C) 2016 Christoph Hellwig. 8 */ 9 #include <linux/bitfield.h> 10 #include <linux/err.h> 11 #include <linux/export.h> 12 #include <linux/irq.h> 13 #include <linux/irqdomain.h> 14 15 #include "../pci.h" 16 #include "msi.h" 17 18 bool pci_msi_enable = true; 19 20 /** 21 * pci_msi_supported - check whether MSI may be enabled on a device 22 * @dev: pointer to the pci_dev data structure of MSI device function 23 * @nvec: how many MSIs have been requested? 24 * 25 * Look at global flags, the device itself, and its parent buses 26 * to determine if MSI/-X are supported for the device. If MSI/-X is 27 * supported return 1, else return 0. 28 **/ 29 static int pci_msi_supported(struct pci_dev *dev, int nvec) 30 { 31 struct pci_bus *bus; 32 33 /* MSI must be globally enabled and supported by the device */ 34 if (!pci_msi_enable) 35 return 0; 36 37 if (!dev || dev->no_msi) 38 return 0; 39 40 /* 41 * You can't ask to have 0 or less MSIs configured. 42 * a) it's stupid .. 43 * b) the list manipulation code assumes nvec >= 1. 44 */ 45 if (nvec < 1) 46 return 0; 47 48 /* 49 * Any bridge which does NOT route MSI transactions from its 50 * secondary bus to its primary bus must set NO_MSI flag on 51 * the secondary pci_bus. 52 * 53 * The NO_MSI flag can either be set directly by: 54 * - arch-specific PCI host bus controller drivers (deprecated) 55 * - quirks for specific PCI bridges 56 * 57 * or indirectly by platform-specific PCI host bridge drivers by 58 * advertising the 'msi_domain' property, which results in 59 * the NO_MSI flag when no MSI domain is found for this bridge 60 * at probe time. 61 */ 62 for (bus = dev->bus; bus; bus = bus->parent) 63 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 64 return 0; 65 66 return 1; 67 } 68 69 static void pcim_msi_release(void *pcidev) 70 { 71 struct pci_dev *dev = pcidev; 72 73 dev->is_msi_managed = false; 74 pci_free_irq_vectors(dev); 75 } 76 77 /* 78 * Needs to be separate from pcim_release to prevent an ordering problem 79 * vs. msi_device_data_release() in the MSI core code. 80 */ 81 static int pcim_setup_msi_release(struct pci_dev *dev) 82 { 83 int ret; 84 85 if (!pci_is_managed(dev) || dev->is_msi_managed) 86 return 0; 87 88 ret = devm_add_action(&dev->dev, pcim_msi_release, dev); 89 if (ret) 90 return ret; 91 92 dev->is_msi_managed = true; 93 return 0; 94 } 95 96 /* 97 * Ordering vs. devres: msi device data has to be installed first so that 98 * pcim_msi_release() is invoked before it on device release. 99 */ 100 static int pci_setup_msi_context(struct pci_dev *dev) 101 { 102 int ret = msi_setup_device_data(&dev->dev); 103 104 if (ret) 105 return ret; 106 107 return pcim_setup_msi_release(dev); 108 } 109 110 /* 111 * Helper functions for mask/unmask and MSI message handling 112 */ 113 114 void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) 115 { 116 struct pci_dev *dev = msi_desc_to_pci_dev(desc); 117 raw_spinlock_t *lock = &dev->msi_lock; 118 unsigned long flags; 119 120 if (!desc->pci.msi_attrib.can_mask) 121 return; 122 123 raw_spin_lock_irqsave(lock, flags); 124 desc->pci.msi_mask &= ~clear; 125 desc->pci.msi_mask |= set; 126 pci_write_config_dword(dev, desc->pci.mask_pos, desc->pci.msi_mask); 127 raw_spin_unlock_irqrestore(lock, flags); 128 } 129 130 /** 131 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts 132 * @data: pointer to irqdata associated to that interrupt 133 */ 134 void pci_msi_mask_irq(struct irq_data *data) 135 { 136 struct msi_desc *desc = irq_data_get_msi_desc(data); 137 138 __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq)); 139 } 140 EXPORT_SYMBOL_GPL(pci_msi_mask_irq); 141 142 /** 143 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts 144 * @data: pointer to irqdata associated to that interrupt 145 */ 146 void pci_msi_unmask_irq(struct irq_data *data) 147 { 148 struct msi_desc *desc = irq_data_get_msi_desc(data); 149 150 __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq)); 151 } 152 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq); 153 154 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 155 { 156 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 157 158 BUG_ON(dev->current_state != PCI_D0); 159 160 if (entry->pci.msi_attrib.is_msix) { 161 void __iomem *base = pci_msix_desc_addr(entry); 162 163 if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual)) 164 return; 165 166 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 167 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 168 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 169 } else { 170 int pos = dev->msi_cap; 171 u16 data; 172 173 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 174 &msg->address_lo); 175 if (entry->pci.msi_attrib.is_64) { 176 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 177 &msg->address_hi); 178 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 179 } else { 180 msg->address_hi = 0; 181 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 182 } 183 msg->data = data; 184 } 185 } 186 187 static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc, 188 struct msi_msg *msg) 189 { 190 int pos = dev->msi_cap; 191 u16 msgctl; 192 193 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 194 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 195 msgctl |= FIELD_PREP(PCI_MSI_FLAGS_QSIZE, desc->pci.msi_attrib.multiple); 196 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 197 198 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo); 199 if (desc->pci.msi_attrib.is_64) { 200 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, msg->address_hi); 201 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data); 202 } else { 203 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); 204 } 205 /* Ensure that the writes are visible in the device */ 206 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 207 } 208 209 static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg) 210 { 211 void __iomem *base = pci_msix_desc_addr(desc); 212 u32 ctrl = desc->pci.msix_ctrl; 213 bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); 214 215 if (desc->pci.msi_attrib.is_virtual) 216 return; 217 /* 218 * The specification mandates that the entry is masked 219 * when the message is modified: 220 * 221 * "If software changes the Address or Data value of an 222 * entry while the entry is unmasked, the result is 223 * undefined." 224 */ 225 if (unmasked) 226 pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); 227 228 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 229 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 230 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 231 232 if (unmasked) 233 pci_msix_write_vector_ctrl(desc, ctrl); 234 235 /* Ensure that the writes are visible in the device */ 236 readl(base + PCI_MSIX_ENTRY_DATA); 237 } 238 239 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 240 { 241 struct pci_dev *dev = msi_desc_to_pci_dev(entry); 242 243 if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { 244 /* Don't touch the hardware now */ 245 } else if (entry->pci.msi_attrib.is_msix) { 246 pci_write_msg_msix(entry, msg); 247 } else { 248 pci_write_msg_msi(dev, entry, msg); 249 } 250 251 entry->msg = *msg; 252 253 if (entry->write_msi_msg) 254 entry->write_msi_msg(entry, entry->write_msi_msg_data); 255 } 256 257 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 258 { 259 struct msi_desc *entry = irq_get_msi_desc(irq); 260 261 __pci_write_msi_msg(entry, msg); 262 } 263 EXPORT_SYMBOL_GPL(pci_write_msi_msg); 264 265 266 /* PCI/MSI specific functionality */ 267 268 static void pci_intx_for_msi(struct pci_dev *dev, int enable) 269 { 270 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 271 pci_intx(dev, enable); 272 } 273 274 static void pci_msi_set_enable(struct pci_dev *dev, int enable) 275 { 276 u16 control; 277 278 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 279 control &= ~PCI_MSI_FLAGS_ENABLE; 280 if (enable) 281 control |= PCI_MSI_FLAGS_ENABLE; 282 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 283 } 284 285 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, 286 struct irq_affinity_desc *masks) 287 { 288 struct msi_desc desc; 289 u16 control; 290 291 /* MSI Entry Initialization */ 292 memset(&desc, 0, sizeof(desc)); 293 294 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 295 /* Lies, damned lies, and MSIs */ 296 if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) 297 control |= PCI_MSI_FLAGS_MASKBIT; 298 if (pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY)) 299 control &= ~PCI_MSI_FLAGS_MASKBIT; 300 301 desc.nvec_used = nvec; 302 desc.pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 303 desc.pci.msi_attrib.can_mask = !!(control & PCI_MSI_FLAGS_MASKBIT); 304 desc.pci.msi_attrib.default_irq = dev->irq; 305 desc.pci.msi_attrib.multi_cap = FIELD_GET(PCI_MSI_FLAGS_QMASK, control); 306 desc.pci.msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 307 desc.affinity = masks; 308 309 if (control & PCI_MSI_FLAGS_64BIT) 310 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 311 else 312 desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 313 314 /* Save the initial mask status */ 315 if (desc.pci.msi_attrib.can_mask) 316 pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask); 317 318 return msi_insert_msi_desc(&dev->dev, &desc); 319 } 320 321 static int msi_verify_entries(struct pci_dev *dev) 322 { 323 struct msi_desc *entry; 324 u64 address; 325 326 if (dev->msi_addr_mask == DMA_BIT_MASK(64)) 327 return 0; 328 329 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 330 address = (u64)entry->msg.address_hi << 32 | entry->msg.address_lo; 331 if (address & ~dev->msi_addr_mask) { 332 pci_err(dev, "arch assigned 64-bit MSI address %#llx above device MSI address mask %#llx\n", 333 address, dev->msi_addr_mask); 334 break; 335 } 336 } 337 return !entry ? 0 : -EIO; 338 } 339 340 static int __msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) 341 { 342 int ret = msi_setup_msi_desc(dev, nvec, masks); 343 struct msi_desc *entry, desc; 344 345 if (ret) 346 return ret; 347 348 /* All MSIs are unmasked by default; mask them all */ 349 entry = msi_first_desc(&dev->dev, MSI_DESC_ALL); 350 pci_msi_mask(entry, msi_multi_mask(entry)); 351 /* 352 * Copy the MSI descriptor for the error path because 353 * pci_msi_setup_msi_irqs() will free it for the hierarchical 354 * interrupt domain case. 355 */ 356 memcpy(&desc, entry, sizeof(desc)); 357 358 /* Configure MSI capability structure */ 359 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 360 if (ret) 361 goto err; 362 363 ret = msi_verify_entries(dev); 364 if (ret) 365 goto err; 366 367 /* Set MSI enabled bits */ 368 dev->msi_enabled = 1; 369 pci_intx_for_msi(dev, 0); 370 pci_msi_set_enable(dev, 1); 371 372 pcibios_free_irq(dev); 373 dev->irq = entry->irq; 374 return 0; 375 err: 376 pci_msi_unmask(&desc, msi_multi_mask(&desc)); 377 pci_free_msi_irqs(dev); 378 return ret; 379 } 380 381 /** 382 * msi_capability_init - configure device's MSI capability structure 383 * @dev: pointer to the pci_dev data structure of MSI device function 384 * @nvec: number of interrupts to allocate 385 * @affd: description of automatic IRQ affinity assignments (may be %NULL) 386 * 387 * Setup the MSI capability structure of the device with the requested 388 * number of interrupts. A return value of zero indicates the successful 389 * setup of an entry with the new MSI IRQ. A negative return value indicates 390 * an error, and a positive return value indicates the number of interrupts 391 * which could have been allocated. 392 */ 393 static int msi_capability_init(struct pci_dev *dev, int nvec, 394 struct irq_affinity *affd) 395 { 396 /* Reject multi-MSI early on irq domain enabled architectures */ 397 if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) 398 return 1; 399 400 /* 401 * Disable MSI during setup in the hardware, but mark it enabled 402 * so that setup code can evaluate it. 403 */ 404 pci_msi_set_enable(dev, 0); 405 406 struct irq_affinity_desc *masks __free(kfree) = 407 affd ? irq_create_affinity_masks(nvec, affd) : NULL; 408 409 guard(msi_descs_lock)(&dev->dev); 410 return __msi_capability_init(dev, nvec, masks); 411 } 412 413 int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 414 struct irq_affinity *affd) 415 { 416 int nvec; 417 int rc; 418 419 if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) 420 return -EINVAL; 421 422 /* Check whether driver already requested MSI-X IRQs */ 423 if (dev->msix_enabled) { 424 pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); 425 return -EINVAL; 426 } 427 428 if (maxvec < minvec) 429 return -ERANGE; 430 431 if (WARN_ON_ONCE(dev->msi_enabled)) 432 return -EINVAL; 433 434 /* Test for the availability of MSI support */ 435 if (!pci_msi_domain_supports(dev, 0, ALLOW_LEGACY)) 436 return -ENOTSUPP; 437 438 nvec = pci_msi_vec_count(dev); 439 if (nvec < 0) 440 return nvec; 441 if (nvec < minvec) 442 return -ENOSPC; 443 444 rc = pci_setup_msi_context(dev); 445 if (rc) 446 return rc; 447 448 if (!pci_setup_msi_device_domain(dev, nvec)) 449 return -ENODEV; 450 451 if (nvec > maxvec) 452 nvec = maxvec; 453 454 for (;;) { 455 if (affd) { 456 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 457 if (nvec < minvec) 458 return -ENOSPC; 459 } 460 461 rc = msi_capability_init(dev, nvec, affd); 462 if (rc == 0) 463 return nvec; 464 465 if (rc < 0) 466 return rc; 467 if (rc < minvec) 468 return -ENOSPC; 469 470 nvec = rc; 471 } 472 } 473 474 /** 475 * pci_msi_vec_count - Return the number of MSI vectors a device can send 476 * @dev: device to report about 477 * 478 * This function returns the number of MSI vectors a device requested via 479 * Multiple Message Capable register. It returns a negative errno if the 480 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 481 * and returns a power of two, up to a maximum of 2^5 (32), according to the 482 * MSI specification. 483 **/ 484 int pci_msi_vec_count(struct pci_dev *dev) 485 { 486 int ret; 487 u16 msgctl; 488 489 if (!dev->msi_cap) 490 return -EINVAL; 491 492 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 493 ret = 1 << FIELD_GET(PCI_MSI_FLAGS_QMASK, msgctl); 494 495 return ret; 496 } 497 EXPORT_SYMBOL(pci_msi_vec_count); 498 499 /* 500 * Architecture override returns true when the PCI MSI message should be 501 * written by the generic restore function. 502 */ 503 bool __weak arch_restore_msi_irqs(struct pci_dev *dev) 504 { 505 return true; 506 } 507 508 void __pci_restore_msi_state(struct pci_dev *dev) 509 { 510 struct msi_desc *entry; 511 u16 control; 512 513 if (!dev->msi_enabled) 514 return; 515 516 entry = irq_get_msi_desc(dev->irq); 517 518 pci_intx_for_msi(dev, 0); 519 pci_msi_set_enable(dev, 0); 520 if (arch_restore_msi_irqs(dev)) 521 __pci_write_msi_msg(entry, &entry->msg); 522 523 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 524 pci_msi_update_mask(entry, 0, 0); 525 control &= ~PCI_MSI_FLAGS_QSIZE; 526 control |= PCI_MSI_FLAGS_ENABLE | 527 FIELD_PREP(PCI_MSI_FLAGS_QSIZE, entry->pci.msi_attrib.multiple); 528 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 529 } 530 531 void pci_msi_shutdown(struct pci_dev *dev) 532 { 533 struct msi_desc *desc; 534 535 if (!pci_msi_enable || !dev || !dev->msi_enabled) 536 return; 537 538 pci_msi_set_enable(dev, 0); 539 pci_intx_for_msi(dev, 1); 540 dev->msi_enabled = 0; 541 542 /* Return the device with MSI unmasked as initial states */ 543 desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); 544 if (!WARN_ON_ONCE(!desc)) 545 pci_msi_unmask(desc, msi_multi_mask(desc)); 546 547 /* Restore dev->irq to its default pin-assertion IRQ */ 548 dev->irq = desc->pci.msi_attrib.default_irq; 549 pcibios_alloc_irq(dev); 550 } 551 552 /* PCI/MSI-X specific functionality */ 553 554 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) 555 { 556 u16 ctrl; 557 558 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); 559 ctrl &= ~clear; 560 ctrl |= set; 561 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); 562 } 563 564 static void __iomem *msix_map_region(struct pci_dev *dev, 565 unsigned int nr_entries) 566 { 567 resource_size_t phys_addr; 568 u32 table_offset; 569 unsigned long flags; 570 u8 bir; 571 572 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 573 &table_offset); 574 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 575 flags = pci_resource_flags(dev, bir); 576 if (!flags || (flags & IORESOURCE_UNSET)) 577 return NULL; 578 579 table_offset &= PCI_MSIX_TABLE_OFFSET; 580 phys_addr = pci_resource_start(dev, bir) + table_offset; 581 582 return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 583 } 584 585 /** 586 * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation 587 * @dev: The PCI device for which the descriptor is prepared 588 * @desc: The MSI descriptor for preparation 589 * 590 * This is separate from msix_setup_msi_descs() below to handle dynamic 591 * allocations for MSI-X after initial enablement. 592 * 593 * Ideally the whole MSI-X setup would work that way, but there is no way to 594 * support this for the legacy arch_setup_msi_irqs() mechanism and for the 595 * fake irq domains like the x86 XEN one. Sigh... 596 * 597 * The descriptor is zeroed and only @desc::msi_index and @desc::affinity 598 * are set. When called from msix_setup_msi_descs() then the is_virtual 599 * attribute is initialized as well. 600 * 601 * Fill in the rest. 602 */ 603 void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) 604 { 605 desc->nvec_used = 1; 606 desc->pci.msi_attrib.is_msix = 1; 607 desc->pci.msi_attrib.is_64 = 1; 608 desc->pci.msi_attrib.default_irq = dev->irq; 609 desc->pci.mask_base = dev->msix_base; 610 611 612 if (!pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY) && 613 !desc->pci.msi_attrib.is_virtual) { 614 void __iomem *addr = pci_msix_desc_addr(desc); 615 616 desc->pci.msi_attrib.can_mask = 1; 617 /* Workaround for SUN NIU insanity, which requires write before read */ 618 if (dev->dev_flags & PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST) 619 writel(0, addr + PCI_MSIX_ENTRY_DATA); 620 desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); 621 } 622 } 623 624 static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries, 625 int nvec, struct irq_affinity_desc *masks) 626 { 627 int ret = 0, i, vec_count = pci_msix_vec_count(dev); 628 struct irq_affinity_desc *curmsk; 629 struct msi_desc desc; 630 631 memset(&desc, 0, sizeof(desc)); 632 633 for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { 634 desc.msi_index = entries ? entries[i].entry : i; 635 desc.affinity = masks ? curmsk : NULL; 636 desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; 637 638 msix_prepare_msi_desc(dev, &desc); 639 640 ret = msi_insert_msi_desc(&dev->dev, &desc); 641 if (ret) 642 break; 643 } 644 return ret; 645 } 646 647 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) 648 { 649 struct msi_desc *desc; 650 651 if (entries) { 652 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) { 653 entries->vector = desc->irq; 654 entries++; 655 } 656 } 657 } 658 659 static void msix_mask_all(void __iomem *base, int tsize) 660 { 661 u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; 662 int i; 663 664 for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) 665 writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); 666 } 667 668 DEFINE_FREE(free_msi_irqs, struct pci_dev *, if (_T) pci_free_msi_irqs(_T)); 669 670 static int __msix_setup_interrupts(struct pci_dev *__dev, struct msix_entry *entries, 671 int nvec, struct irq_affinity_desc *masks) 672 { 673 struct pci_dev *dev __free(free_msi_irqs) = __dev; 674 675 int ret = msix_setup_msi_descs(dev, entries, nvec, masks); 676 if (ret) 677 return ret; 678 679 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 680 if (ret) 681 return ret; 682 683 /* Check if all MSI entries honor device restrictions */ 684 ret = msi_verify_entries(dev); 685 if (ret) 686 return ret; 687 688 msix_update_entries(dev, entries); 689 retain_and_null_ptr(dev); 690 return 0; 691 } 692 693 static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, 694 int nvec, struct irq_affinity *affd) 695 { 696 struct irq_affinity_desc *masks __free(kfree) = 697 affd ? irq_create_affinity_masks(nvec, affd) : NULL; 698 699 guard(msi_descs_lock)(&dev->dev); 700 return __msix_setup_interrupts(dev, entries, nvec, masks); 701 } 702 703 /** 704 * msix_capability_init - configure device's MSI-X capability 705 * @dev: pointer to the pci_dev data structure of MSI-X device function 706 * @entries: pointer to an array of struct msix_entry entries 707 * @nvec: number of @entries 708 * @affd: Optional pointer to enable automatic affinity assignment 709 * 710 * Setup the MSI-X capability structure of device function with a 711 * single MSI-X IRQ. A return of zero indicates the successful setup of 712 * requested MSI-X entries with allocated IRQs or non-zero for otherwise. 713 **/ 714 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, 715 int nvec, struct irq_affinity *affd) 716 { 717 int ret, tsize; 718 u16 control; 719 720 /* 721 * Some devices require MSI-X to be enabled before the MSI-X 722 * registers can be accessed. Mask all the vectors to prevent 723 * interrupts coming in before they're fully set up. 724 */ 725 pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | 726 PCI_MSIX_FLAGS_ENABLE); 727 728 /* Mark it enabled so setup functions can query it */ 729 dev->msix_enabled = 1; 730 731 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 732 /* Request & Map MSI-X table region */ 733 tsize = msix_table_size(control); 734 dev->msix_base = msix_map_region(dev, tsize); 735 if (!dev->msix_base) { 736 ret = -ENOMEM; 737 goto out_disable; 738 } 739 740 ret = msix_setup_interrupts(dev, entries, nvec, affd); 741 if (ret) 742 goto out_unmap; 743 744 /* Disable INTX */ 745 pci_intx_for_msi(dev, 0); 746 747 if (!pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY)) { 748 /* 749 * Ensure that all table entries are masked to prevent 750 * stale entries from firing in a crash kernel. 751 * 752 * Done late to deal with a broken Marvell NVME device 753 * which takes the MSI-X mask bits into account even 754 * when MSI-X is disabled, which prevents MSI delivery. 755 */ 756 msix_mask_all(dev->msix_base, tsize); 757 } 758 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 759 760 pcibios_free_irq(dev); 761 return 0; 762 763 out_unmap: 764 iounmap(dev->msix_base); 765 out_disable: 766 dev->msix_enabled = 0; 767 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); 768 769 return ret; 770 } 771 772 static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, int nvec) 773 { 774 bool nogap; 775 int i, j; 776 777 if (!entries) 778 return true; 779 780 nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY); 781 782 for (i = 0; i < nvec; i++) { 783 /* Check for duplicate entries */ 784 for (j = i + 1; j < nvec; j++) { 785 if (entries[i].entry == entries[j].entry) 786 return false; 787 } 788 /* Check for unsupported gaps */ 789 if (nogap && entries[i].entry != i) 790 return false; 791 } 792 return true; 793 } 794 795 int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, 796 int maxvec, struct irq_affinity *affd, int flags) 797 { 798 int hwsize, rc, nvec = maxvec; 799 800 if (maxvec < minvec) 801 return -ERANGE; 802 803 if (dev->msi_enabled) { 804 pci_info(dev, "can't enable MSI-X (MSI already enabled)\n"); 805 return -EINVAL; 806 } 807 808 if (WARN_ON_ONCE(dev->msix_enabled)) 809 return -EINVAL; 810 811 /* Check MSI-X early on irq domain enabled architectures */ 812 if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY)) 813 return -ENOTSUPP; 814 815 if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) 816 return -EINVAL; 817 818 hwsize = pci_msix_vec_count(dev); 819 if (hwsize < 0) 820 return hwsize; 821 822 if (!pci_msix_validate_entries(dev, entries, nvec)) 823 return -EINVAL; 824 825 if (hwsize < nvec) { 826 /* Keep the IRQ virtual hackery working */ 827 if (flags & PCI_IRQ_VIRTUAL) 828 hwsize = nvec; 829 else 830 nvec = hwsize; 831 } 832 833 if (nvec < minvec) 834 return -ENOSPC; 835 836 rc = pci_setup_msi_context(dev); 837 if (rc) 838 return rc; 839 840 if (!pci_setup_msix_device_domain(dev, hwsize)) 841 return -ENODEV; 842 843 for (;;) { 844 if (affd) { 845 nvec = irq_calc_affinity_vectors(minvec, nvec, affd); 846 if (nvec < minvec) 847 return -ENOSPC; 848 } 849 850 rc = msix_capability_init(dev, entries, nvec, affd); 851 if (rc == 0) 852 return nvec; 853 854 if (rc < 0) 855 return rc; 856 if (rc < minvec) 857 return -ENOSPC; 858 859 nvec = rc; 860 } 861 } 862 863 void __pci_restore_msix_state(struct pci_dev *dev) 864 { 865 struct msi_desc *entry; 866 bool write_msg; 867 868 if (!dev->msix_enabled) 869 return; 870 871 /* route the table */ 872 pci_intx_for_msi(dev, 0); 873 pci_msix_clear_and_set_ctrl(dev, 0, 874 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 875 876 write_msg = arch_restore_msi_irqs(dev); 877 878 scoped_guard (msi_descs_lock, &dev->dev) { 879 msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { 880 if (write_msg) 881 __pci_write_msi_msg(entry, &entry->msg); 882 pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); 883 } 884 } 885 886 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 887 } 888 889 void pci_msix_shutdown(struct pci_dev *dev) 890 { 891 struct msi_desc *desc; 892 893 if (!pci_msi_enable || !dev || !dev->msix_enabled) 894 return; 895 896 if (pci_dev_is_disconnected(dev)) { 897 dev->msix_enabled = 0; 898 return; 899 } 900 901 /* Return the device with MSI-X masked as initial states */ 902 msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) 903 pci_msix_mask(desc); 904 905 pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 906 pci_intx_for_msi(dev, 1); 907 dev->msix_enabled = 0; 908 pcibios_alloc_irq(dev); 909 } 910 911 /* Common interfaces */ 912 913 void pci_free_msi_irqs(struct pci_dev *dev) 914 { 915 pci_msi_teardown_msi_irqs(dev); 916 917 if (dev->msix_base) { 918 iounmap(dev->msix_base); 919 dev->msix_base = NULL; 920 } 921 } 922 923 #ifdef CONFIG_PCIE_TPH 924 /** 925 * pci_msix_write_tph_tag - Update the TPH tag for a given MSI-X vector 926 * @pdev: The PCIe device to update 927 * @index: The MSI-X index to update 928 * @tag: The tag to write 929 * 930 * Returns: 0 on success, error code on failure 931 */ 932 int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) 933 { 934 struct msi_desc *msi_desc; 935 struct irq_desc *irq_desc; 936 unsigned int virq; 937 938 if (!pdev->msix_enabled) 939 return -ENXIO; 940 941 virq = msi_get_virq(&pdev->dev, index); 942 if (!virq) 943 return -ENXIO; 944 945 guard(msi_descs_lock)(&pdev->dev); 946 947 /* 948 * This is a horrible hack, but short of implementing a PCI 949 * specific interrupt chip callback and a huge pile of 950 * infrastructure, this is the minor nuisance. It provides the 951 * protection against concurrent operations on this entry and keeps 952 * the control word cache in sync. 953 */ 954 irq_desc = irq_to_desc(virq); 955 if (!irq_desc) 956 return -ENXIO; 957 958 guard(raw_spinlock_irq)(&irq_desc->lock); 959 msi_desc = irq_data_get_msi_desc(&irq_desc->irq_data); 960 if (!msi_desc || msi_desc->pci.msi_attrib.is_virtual) 961 return -ENXIO; 962 963 msi_desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_ST; 964 msi_desc->pci.msix_ctrl |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); 965 pci_msix_write_vector_ctrl(msi_desc, msi_desc->pci.msix_ctrl); 966 /* Flush the write */ 967 readl(pci_msix_desc_addr(msi_desc)); 968 return 0; 969 } 970 #endif 971 972 /* Misc. infrastructure */ 973 974 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 975 { 976 return to_pci_dev(desc->dev); 977 } 978 EXPORT_SYMBOL(msi_desc_to_pci_dev); 979 980 void pci_no_msi(void) 981 { 982 pci_msi_enable = false; 983 } 984