1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * apic_introp.c: 28 * Has code for Advanced DDI interrupt framework support. 29 */ 30 31 #include <sys/cpuvar.h> 32 #include <sys/psm.h> 33 #include <sys/archsystm.h> 34 #include <sys/apic.h> 35 #include <sys/sunddi.h> 36 #include <sys/ddi_impldefs.h> 37 #include <sys/mach_intr.h> 38 #include <sys/sysmacros.h> 39 #include <sys/trap.h> 40 #include <sys/pci.h> 41 #include <sys/pci_intr_lib.h> 42 43 extern struct av_head autovect[]; 44 45 /* 46 * Local Function Prototypes 47 */ 48 apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 49 50 /* 51 * MSI support flag: 52 * reflects whether MSI is supported at APIC level 53 * it can also be patched through /etc/system 54 * 55 * 0 = default value - don't know and need to call apic_check_msi_support() 56 * to find out then set it accordingly 57 * 1 = supported 58 * -1 = not supported 59 */ 60 int apic_support_msi = 0; 61 62 /* Multiple vector support for MSI */ 63 #if !defined(__xpv) 64 int apic_multi_msi_enable = 1; 65 #else 66 /* 67 * Xen hypervisor does not seem to properly support multi-MSI 68 */ 69 int apic_multi_msi_enable = 0; 70 #endif /* __xpv */ 71 72 /* Multiple vector support for MSI-X */ 73 int apic_msix_enable = 1; 74 75 /* 76 * apic_pci_msi_enable_vector: 77 * Set the address/data fields in the MSI/X capability structure 78 * XXX: MSI-X support 79 */ 80 /* ARGSUSED */ 81 void 82 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector, 83 int count, int target_apic_id) 84 { 85 uint64_t msi_addr, msi_data; 86 ushort_t msi_ctrl; 87 dev_info_t *dip = irq_ptr->airq_dip; 88 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 89 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 90 #if !defined(__xpv) 91 msi_regs_t msi_regs; 92 #endif /* ! __xpv */ 93 94 DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n" 95 "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip, 96 ddi_driver_name(dip), inum, vector, target_apic_id)); 97 98 ASSERT((handle != NULL) && (cap_ptr != 0)); 99 100 #if !defined(__xpv) 101 msi_regs.mr_data = vector; 102 msi_regs.mr_addr = target_apic_id; 103 104 apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); 105 apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs); 106 apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs); 107 108 /* MSI Address */ 109 msi_addr = msi_regs.mr_addr; 110 111 /* MSI Data: MSI is edge triggered according to spec */ 112 msi_data = msi_regs.mr_data; 113 #else 114 /* MSI Address */ 115 msi_addr = (MSI_ADDR_HDR | 116 (target_apic_id << MSI_ADDR_DEST_SHIFT)); 117 msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | 118 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT)); 119 120 /* MSI Data: MSI is edge triggered according to spec */ 121 msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector); 122 #endif /* ! __xpv */ 123 124 DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx " 125 "data=0x%lx\n", (long)msi_addr, (long)msi_data)); 126 127 if (type == DDI_INTR_TYPE_MSI) { 128 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 129 130 /* Set the bits to inform how many MSIs are enabled */ 131 msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT); 132 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 133 134 #if !defined(__xpv) 135 /* 136 * Only set vector if not on hypervisor 137 */ 138 pci_config_put32(handle, 139 cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr); 140 141 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 142 pci_config_put32(handle, 143 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32); 144 pci_config_put16(handle, 145 cap_ptr + PCI_MSI_64BIT_DATA, msi_data); 146 } else { 147 pci_config_put16(handle, 148 cap_ptr + PCI_MSI_32BIT_DATA, msi_data); 149 } 150 151 } else if (type == DDI_INTR_TYPE_MSIX) { 152 uintptr_t off; 153 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 154 155 /* Offset into the "inum"th entry in the MSI-X table */ 156 off = (uintptr_t)msix_p->msix_tbl_addr + 157 (inum * PCI_MSIX_VECTOR_SIZE); 158 159 ddi_put32(msix_p->msix_tbl_hdl, 160 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data); 161 ddi_put64(msix_p->msix_tbl_hdl, 162 (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr); 163 #endif /* ! __xpv */ 164 } 165 } 166 167 168 #if !defined(__xpv) 169 170 /* 171 * This function returns the no. of vectors available for the pri. 172 * dip is not used at this moment. If we really don't need that, 173 * it will be removed. 174 */ 175 /*ARGSUSED*/ 176 int 177 apic_navail_vector(dev_info_t *dip, int pri) 178 { 179 int lowest, highest, i, navail, count; 180 181 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 182 (void *)dip, pri)); 183 184 highest = apic_ipltopri[pri] + APIC_VECTOR_MASK; 185 lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL; 186 navail = count = 0; 187 188 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 189 lowest -= APIC_VECTOR_PER_IPL; 190 191 /* It has to be contiguous */ 192 for (i = lowest; i < highest; i++) { 193 count = 0; 194 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 195 (i < highest)) { 196 if (APIC_CHECK_RESERVE_VECTORS(i)) 197 break; 198 count++; 199 i++; 200 } 201 if (count > navail) 202 navail = count; 203 } 204 return (navail); 205 } 206 207 #endif /* ! __xpv */ 208 209 /* 210 * Finds "count" contiguous MSI vectors starting at the proper alignment 211 * at "pri". 212 * Caller needs to make sure that count has to be power of 2 and should not 213 * be < 1. 214 */ 215 uchar_t 216 apic_find_multi_vectors(int pri, int count) 217 { 218 int lowest, highest, i, navail, start, msibits; 219 220 DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n", 221 pri, count)); 222 223 highest = apic_ipltopri[pri] + APIC_VECTOR_MASK; 224 lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL; 225 navail = 0; 226 227 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 228 lowest -= APIC_VECTOR_PER_IPL; 229 230 /* 231 * msibits is the no. of lower order message data bits for the 232 * allocated MSI vectors and is used to calculate the aligned 233 * starting vector 234 */ 235 msibits = count - 1; 236 237 /* It has to be contiguous */ 238 for (i = lowest; i < highest; i++) { 239 navail = 0; 240 241 /* 242 * starting vector has to be aligned accordingly for 243 * multiple MSIs 244 */ 245 if (msibits) 246 i = (i + msibits) & ~msibits; 247 start = i; 248 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 249 (i < highest)) { 250 if (APIC_CHECK_RESERVE_VECTORS(i)) 251 break; 252 navail++; 253 if (navail >= count) 254 return (start); 255 i++; 256 } 257 } 258 return (0); 259 } 260 261 262 /* 263 * It finds the apic_irq_t associates with the dip, ispec and type. 264 */ 265 apic_irq_t * 266 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type) 267 { 268 apic_irq_t *irqp; 269 int i; 270 271 DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x " 272 "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec, 273 ispec->intrspec_pri, type)); 274 275 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 276 if ((irqp = apic_irq_table[i]) == NULL) 277 continue; 278 if ((irqp->airq_dip == dip) && 279 (irqp->airq_origirq == ispec->intrspec_vec) && 280 (irqp->airq_ipl == ispec->intrspec_pri)) { 281 if (type == DDI_INTR_TYPE_MSI) { 282 if (irqp->airq_mps_intr_index == MSI_INDEX) 283 return (irqp); 284 } else if (type == DDI_INTR_TYPE_MSIX) { 285 if (irqp->airq_mps_intr_index == MSIX_INDEX) 286 return (irqp); 287 } else 288 return (irqp); 289 } 290 } 291 DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n")); 292 return (NULL); 293 } 294 295 296 #if !defined(__xpv) 297 298 /* 299 * This function will return the pending bit of the irqp. 300 * It either comes from the IRR register of the APIC or the RDT 301 * entry of the I/O APIC. 302 * For the IRR to work, it needs to be to its binding CPU 303 */ 304 static int 305 apic_get_pending(apic_irq_t *irqp, int type) 306 { 307 int bit, index, irr, pending; 308 int intin_no; 309 int apic_ix; 310 311 DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x " 312 "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND, 313 type)); 314 315 /* need to get on the bound cpu */ 316 mutex_enter(&cpu_lock); 317 affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND); 318 319 index = irqp->airq_vector / 32; 320 bit = irqp->airq_vector % 32; 321 irr = apic_reg_ops->apic_read(APIC_IRR_REG + index); 322 323 affinity_clear(); 324 mutex_exit(&cpu_lock); 325 326 pending = (irr & (1 << bit)) ? 1 : 0; 327 if (!pending && (type == DDI_INTR_TYPE_FIXED)) { 328 /* check I/O APIC for fixed interrupt */ 329 intin_no = irqp->airq_intin_no; 330 apic_ix = irqp->airq_ioapicindex; 331 pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) & 332 AV_PENDING) ? 1 : 0; 333 } 334 return (pending); 335 } 336 337 338 /* 339 * This function will clear the mask for the interrupt on the I/O APIC 340 */ 341 static void 342 apic_clear_mask(apic_irq_t *irqp) 343 { 344 int intin_no; 345 ulong_t iflag; 346 int32_t rdt_entry; 347 int apic_ix; 348 349 DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n", 350 (void *)irqp)); 351 352 intin_no = irqp->airq_intin_no; 353 apic_ix = irqp->airq_ioapicindex; 354 355 iflag = intr_clear(); 356 lock_set(&apic_ioapic_lock); 357 358 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no); 359 360 /* clear mask */ 361 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no, 362 ((~AV_MASK) & rdt_entry)); 363 364 lock_clear(&apic_ioapic_lock); 365 intr_restore(iflag); 366 } 367 368 369 /* 370 * This function will mask the interrupt on the I/O APIC 371 */ 372 static void 373 apic_set_mask(apic_irq_t *irqp) 374 { 375 int intin_no; 376 int apic_ix; 377 ulong_t iflag; 378 int32_t rdt_entry; 379 380 DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp)); 381 382 intin_no = irqp->airq_intin_no; 383 apic_ix = irqp->airq_ioapicindex; 384 385 iflag = intr_clear(); 386 387 lock_set(&apic_ioapic_lock); 388 389 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no); 390 391 /* mask it */ 392 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no, 393 (AV_MASK | rdt_entry)); 394 395 lock_clear(&apic_ioapic_lock); 396 intr_restore(iflag); 397 } 398 399 400 void 401 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 402 { 403 int i; 404 apic_irq_t *irqptr; 405 struct intrspec ispec; 406 407 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 408 "count: %x pri: %x type: %x\n", 409 (void *)dip, inum, count, pri, type)); 410 411 /* for MSI/X only */ 412 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 413 return; 414 415 for (i = 0; i < count; i++) { 416 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 417 "pri=0x%x count=0x%x\n", inum, pri, count)); 418 ispec.intrspec_vec = inum + i; 419 ispec.intrspec_pri = pri; 420 if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) { 421 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: " 422 "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() " 423 "failed\n", (void *)dip, inum, pri)); 424 continue; 425 } 426 irqptr->airq_mps_intr_index = FREE_INDEX; 427 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 428 } 429 } 430 431 #endif /* ! __xpv */ 432 433 /* 434 * check whether the system supports MSI 435 * 436 * If PCI-E capability is found, then this must be a PCI-E system. 437 * Since MSI is required for PCI-E system, it returns PSM_SUCCESS 438 * to indicate this system supports MSI. 439 */ 440 int 441 apic_check_msi_support() 442 { 443 dev_info_t *cdip; 444 char dev_type[16]; 445 int dev_len; 446 447 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n")); 448 449 /* 450 * check whether the first level children of root_node have 451 * PCI-E capability 452 */ 453 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; 454 cdip = ddi_get_next_sibling(cdip)) { 455 456 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p," 457 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip, 458 ddi_driver_name(cdip), ddi_binding_name(cdip), 459 ddi_node_name(cdip))); 460 dev_len = sizeof (dev_type); 461 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 462 "device_type", (caddr_t)dev_type, &dev_len) 463 != DDI_PROP_SUCCESS) 464 continue; 465 if (strcmp(dev_type, "pciex") == 0) 466 return (PSM_SUCCESS); 467 } 468 469 /* MSI is not supported on this system */ 470 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' " 471 "device_type found\n")); 472 return (PSM_FAILURE); 473 } 474 475 #if !defined(__xpv) 476 477 /* 478 * apic_pci_msi_unconfigure: 479 * 480 * This and next two interfaces are copied from pci_intr_lib.c 481 * Do ensure that these two files stay in sync. 482 * These needed to be copied over here to avoid a deadlock situation on 483 * certain mp systems that use MSI interrupts. 484 * 485 * IMPORTANT regards next three interfaces: 486 * i) are called only for MSI/X interrupts. 487 * ii) called with interrupts disabled, and must not block 488 */ 489 void 490 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 491 { 492 ushort_t msi_ctrl; 493 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 494 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 495 496 ASSERT((handle != NULL) && (cap_ptr != 0)); 497 498 if (type == DDI_INTR_TYPE_MSI) { 499 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 500 msi_ctrl &= (~PCI_MSI_MME_MASK); 501 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 502 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0); 503 504 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 505 pci_config_put16(handle, 506 cap_ptr + PCI_MSI_64BIT_DATA, 0); 507 pci_config_put32(handle, 508 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0); 509 } else { 510 pci_config_put16(handle, 511 cap_ptr + PCI_MSI_32BIT_DATA, 0); 512 } 513 514 } else if (type == DDI_INTR_TYPE_MSIX) { 515 uintptr_t off; 516 uint32_t mask; 517 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip); 518 519 /* Offset into "inum"th entry in the MSI-X table & mask it */ 520 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 521 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 522 523 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 524 525 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1)); 526 527 /* Offset into the "inum"th entry in the MSI-X table */ 528 off = (uintptr_t)msix_p->msix_tbl_addr + 529 (inum * PCI_MSIX_VECTOR_SIZE); 530 531 /* Reset the "data" and "addr" bits */ 532 ddi_put32(msix_p->msix_tbl_hdl, 533 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0); 534 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0); 535 } 536 } 537 538 539 /* 540 * apic_pci_msi_enable_mode: 541 */ 542 void 543 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum) 544 { 545 ushort_t msi_ctrl; 546 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 547 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 548 549 ASSERT((handle != NULL) && (cap_ptr != 0)); 550 551 if (type == DDI_INTR_TYPE_MSI) { 552 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 553 if ((msi_ctrl & PCI_MSI_ENABLE_BIT)) 554 return; 555 556 msi_ctrl |= PCI_MSI_ENABLE_BIT; 557 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 558 559 } else if (type == DDI_INTR_TYPE_MSIX) { 560 uintptr_t off; 561 uint32_t mask; 562 ddi_intr_msix_t *msix_p; 563 564 msix_p = i_ddi_get_msix(rdip); 565 566 /* Offset into "inum"th entry in the MSI-X table & clear mask */ 567 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 568 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 569 570 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 571 572 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1)); 573 574 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 575 576 if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) { 577 msi_ctrl |= PCI_MSIX_ENABLE_BIT; 578 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 579 msi_ctrl); 580 } 581 } 582 } 583 584 /* 585 * apic_pci_msi_disable_mode: 586 */ 587 void 588 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 589 { 590 ushort_t msi_ctrl; 591 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 592 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 593 594 ASSERT((handle != NULL) && (cap_ptr != 0)); 595 596 if (type == DDI_INTR_TYPE_MSI) { 597 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 598 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT)) 599 return; 600 601 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */ 602 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 603 604 } else if (type == DDI_INTR_TYPE_MSIX) { 605 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 606 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) { 607 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT; 608 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 609 msi_ctrl); 610 } 611 } 612 } 613 614 615 static int 616 apic_set_cpu(int irqno, int cpu, int *result) 617 { 618 apic_irq_t *irqp; 619 ulong_t iflag; 620 int ret; 621 622 DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n")); 623 624 mutex_enter(&airq_mutex); 625 irqp = apic_irq_table[irqno]; 626 mutex_exit(&airq_mutex); 627 628 if (irqp == NULL) { 629 *result = ENXIO; 630 return (PSM_FAILURE); 631 } 632 633 /* Fail if this is an MSI intr and is part of a group. */ 634 if ((irqp->airq_mps_intr_index == MSI_INDEX) && 635 (irqp->airq_intin_no > 1)) { 636 *result = ENXIO; 637 return (PSM_FAILURE); 638 } 639 640 iflag = intr_clear(); 641 lock_set(&apic_ioapic_lock); 642 643 ret = apic_rebind_all(irqp, cpu); 644 645 lock_clear(&apic_ioapic_lock); 646 intr_restore(iflag); 647 648 if (ret) { 649 *result = EIO; 650 return (PSM_FAILURE); 651 } 652 /* 653 * keep tracking the default interrupt cpu binding 654 */ 655 irqp->airq_cpu = cpu; 656 657 *result = 0; 658 return (PSM_SUCCESS); 659 } 660 661 static int 662 apic_grp_set_cpu(int irqno, int new_cpu, int *result) 663 { 664 dev_info_t *orig_dip; 665 uint32_t orig_cpu; 666 ulong_t iflag; 667 apic_irq_t *irqps[PCI_MSI_MAX_INTRS]; 668 int i; 669 int cap_ptr; 670 int msi_mask_off; 671 ushort_t msi_ctrl; 672 uint32_t msi_pvm; 673 ddi_acc_handle_t handle; 674 int num_vectors = 0; 675 uint32_t vector; 676 677 DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n")); 678 679 /* 680 * Take mutex to insure that table doesn't change out from underneath 681 * us while we're playing with it. 682 */ 683 mutex_enter(&airq_mutex); 684 irqps[0] = apic_irq_table[irqno]; 685 orig_cpu = irqps[0]->airq_temp_cpu; 686 orig_dip = irqps[0]->airq_dip; 687 num_vectors = irqps[0]->airq_intin_no; 688 vector = irqps[0]->airq_vector; 689 690 /* A "group" of 1 */ 691 if (num_vectors == 1) { 692 mutex_exit(&airq_mutex); 693 return (apic_set_cpu(irqno, new_cpu, result)); 694 } 695 696 *result = ENXIO; 697 698 if (irqps[0]->airq_mps_intr_index != MSI_INDEX) { 699 mutex_exit(&airq_mutex); 700 DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n")); 701 goto set_grp_intr_done; 702 } 703 if ((num_vectors < 1) || ((num_vectors - 1) & vector)) { 704 mutex_exit(&airq_mutex); 705 DDI_INTR_IMPLDBG((CE_CONT, 706 "set_grp: base vec not part of a grp or not aligned: " 707 "vec:0x%x, num_vec:0x%x\n", vector, num_vectors)); 708 goto set_grp_intr_done; 709 } 710 DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n", 711 num_vectors)); 712 713 ASSERT((num_vectors + vector) < APIC_MAX_VECTOR); 714 715 *result = EIO; 716 717 /* 718 * All IRQ entries in the table for the given device will be not 719 * shared. Since they are not shared, the dip in the table will 720 * be true to the device of interest. 721 */ 722 for (i = 1; i < num_vectors; i++) { 723 irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]]; 724 if (irqps[i] == NULL) { 725 mutex_exit(&airq_mutex); 726 goto set_grp_intr_done; 727 } 728 #ifdef DEBUG 729 /* Sanity check: CPU and dip is the same for all entries. */ 730 if ((irqps[i]->airq_dip != orig_dip) || 731 (irqps[i]->airq_temp_cpu != orig_cpu)) { 732 mutex_exit(&airq_mutex); 733 DDI_INTR_IMPLDBG((CE_CONT, 734 "set_grp: cpu or dip for vec 0x%x difft than for " 735 "vec 0x%x\n", vector, vector + i)); 736 DDI_INTR_IMPLDBG((CE_CONT, 737 " cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu, 738 irqps[i]->airq_temp_cpu, (void *)orig_dip, 739 (void *)irqps[i]->airq_dip)); 740 goto set_grp_intr_done; 741 } 742 #endif /* DEBUG */ 743 } 744 mutex_exit(&airq_mutex); 745 746 cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip); 747 handle = i_ddi_get_pci_config_handle(orig_dip); 748 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 749 750 /* MSI Per vector masking is supported. */ 751 if (msi_ctrl & PCI_MSI_PVM_MASK) { 752 if (msi_ctrl & PCI_MSI_64BIT_MASK) 753 msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS; 754 else 755 msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK; 756 msi_pvm = pci_config_get32(handle, msi_mask_off); 757 pci_config_put32(handle, msi_mask_off, (uint32_t)-1); 758 DDI_INTR_IMPLDBG((CE_CONT, 759 "set_grp: pvm supported. Mask set to 0x%x\n", 760 pci_config_get32(handle, msi_mask_off))); 761 } 762 763 iflag = intr_clear(); 764 lock_set(&apic_ioapic_lock); 765 766 /* 767 * Do the first rebind and check for errors. Apic_rebind_all returns 768 * an error if the CPU is not accepting interrupts. If the first one 769 * succeeds they all will. 770 */ 771 if (apic_rebind_all(irqps[0], new_cpu)) 772 (void) apic_rebind_all(irqps[0], orig_cpu); 773 else { 774 irqps[0]->airq_cpu = new_cpu; 775 776 for (i = 1; i < num_vectors; i++) { 777 (void) apic_rebind_all(irqps[i], new_cpu); 778 irqps[i]->airq_cpu = new_cpu; 779 } 780 *result = 0; /* SUCCESS */ 781 } 782 783 lock_clear(&apic_ioapic_lock); 784 intr_restore(iflag); 785 786 /* Reenable vectors if per vector masking is supported. */ 787 if (msi_ctrl & PCI_MSI_PVM_MASK) { 788 pci_config_put32(handle, msi_mask_off, msi_pvm); 789 DDI_INTR_IMPLDBG((CE_CONT, 790 "set_grp: pvm supported. Mask restored to 0x%x\n", 791 pci_config_get32(handle, msi_mask_off))); 792 } 793 794 set_grp_intr_done: 795 if (*result != 0) 796 return (PSM_FAILURE); 797 798 return (PSM_SUCCESS); 799 } 800 801 #else /* !__xpv */ 802 803 /* 804 * We let the hypervisor deal with msi configutation 805 * so just stub these out. 806 */ 807 808 /* ARGSUSED */ 809 void 810 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 811 { 812 } 813 814 /* ARGSUSED */ 815 void 816 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum) 817 { 818 } 819 820 /* ARGSUSED */ 821 void 822 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 823 { 824 } 825 826 #endif /* __xpv */ 827 828 int 829 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p) 830 { 831 struct autovec *av_dev; 832 uchar_t irqno; 833 int i; 834 apic_irq_t *irq_p; 835 836 /* Sanity check the vector/irq argument. */ 837 ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR)); 838 839 mutex_enter(&airq_mutex); 840 841 /* 842 * Convert the vecirq arg to an irq using vector_to_irq table 843 * if the arg is a vector. Pass thru if already an irq. 844 */ 845 if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) == 846 PSMGI_INTRBY_VEC) 847 irqno = apic_vector_to_irq[vecirq]; 848 else 849 irqno = vecirq; 850 851 irq_p = apic_irq_table[irqno]; 852 853 if ((irq_p == NULL) || 854 (irq_p->airq_temp_cpu == IRQ_UNBOUND) || 855 (irq_p->airq_temp_cpu == IRQ_UNINIT)) { 856 mutex_exit(&airq_mutex); 857 return (PSM_FAILURE); 858 } 859 860 if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) { 861 862 /* Get the (temp) cpu from apic_irq table, indexed by irq. */ 863 intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu; 864 865 /* Return user bound info for intrd. */ 866 if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) { 867 intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND; 868 intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND; 869 } 870 } 871 872 if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR) 873 intr_params_p->avgi_vector = irq_p->airq_vector; 874 875 if (intr_params_p->avgi_req_flags & 876 (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS)) 877 /* Get number of devices from apic_irq table shared field. */ 878 intr_params_p->avgi_num_devs = irq_p->airq_share; 879 880 if (intr_params_p->avgi_req_flags & PSMGI_REQ_GET_DEVS) { 881 882 intr_params_p->avgi_req_flags |= PSMGI_REQ_NUM_DEVS; 883 884 /* Some devices have NULL dip. Don't count these. */ 885 if (intr_params_p->avgi_num_devs > 0) { 886 for (i = 0, av_dev = autovect[irqno].avh_link; 887 av_dev; av_dev = av_dev->av_link) 888 if (av_dev->av_vector && av_dev->av_dip) 889 i++; 890 intr_params_p->avgi_num_devs = 891 MIN(intr_params_p->avgi_num_devs, i); 892 } 893 894 /* There are no viable dips to return. */ 895 if (intr_params_p->avgi_num_devs == 0) 896 intr_params_p->avgi_dip_list = NULL; 897 898 else { /* Return list of dips */ 899 900 /* Allocate space in array for that number of devs. */ 901 intr_params_p->avgi_dip_list = kmem_zalloc( 902 intr_params_p->avgi_num_devs * 903 sizeof (dev_info_t *), 904 KM_SLEEP); 905 906 /* 907 * Loop through the device list of the autovec table 908 * filling in the dip array. 909 * 910 * Note that the autovect table may have some special 911 * entries which contain NULL dips. These will be 912 * ignored. 913 */ 914 for (i = 0, av_dev = autovect[irqno].avh_link; 915 av_dev; av_dev = av_dev->av_link) 916 if (av_dev->av_vector && av_dev->av_dip) 917 intr_params_p->avgi_dip_list[i++] = 918 av_dev->av_dip; 919 } 920 } 921 922 mutex_exit(&airq_mutex); 923 924 return (PSM_SUCCESS); 925 } 926 927 928 #if !defined(__xpv) 929 930 /* 931 * This function provides external interface to the nexus for all 932 * functionalities related to the new DDI interrupt framework. 933 * 934 * Input: 935 * dip - pointer to the dev_info structure of the requested device 936 * hdlp - pointer to the internal interrupt handle structure for the 937 * requested interrupt 938 * intr_op - opcode for this call 939 * result - pointer to the integer that will hold the result to be 940 * passed back if return value is PSM_SUCCESS 941 * 942 * Output: 943 * return value is either PSM_SUCCESS or PSM_FAILURE 944 */ 945 int 946 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 947 psm_intr_op_t intr_op, int *result) 948 { 949 int cap; 950 int count_vec; 951 int old_priority; 952 int new_priority; 953 int new_cpu; 954 apic_irq_t *irqp; 955 struct intrspec *ispec, intr_spec; 956 957 DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p " 958 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 959 960 ispec = &intr_spec; 961 ispec->intrspec_pri = hdlp->ih_pri; 962 ispec->intrspec_vec = hdlp->ih_inum; 963 ispec->intrspec_func = hdlp->ih_cb_func; 964 965 switch (intr_op) { 966 case PSM_INTR_OP_CHECK_MSI: 967 /* 968 * Check MSI/X is supported or not at APIC level and 969 * masked off the MSI/X bits in hdlp->ih_type if not 970 * supported before return. If MSI/X is supported, 971 * leave the ih_type unchanged and return. 972 * 973 * hdlp->ih_type passed in from the nexus has all the 974 * interrupt types supported by the device. 975 */ 976 if (apic_support_msi == 0) { 977 /* 978 * if apic_support_msi is not set, call 979 * apic_check_msi_support() to check whether msi 980 * is supported first 981 */ 982 if (apic_check_msi_support() == PSM_SUCCESS) 983 apic_support_msi = 1; 984 else 985 apic_support_msi = -1; 986 } 987 if (apic_support_msi == 1) { 988 if (apic_msix_enable) 989 *result = hdlp->ih_type; 990 else 991 *result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX; 992 } else 993 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 994 DDI_INTR_TYPE_MSIX); 995 break; 996 case PSM_INTR_OP_ALLOC_VECTORS: 997 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 998 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 999 hdlp->ih_scratch1, hdlp->ih_pri, 1000 (int)(uintptr_t)hdlp->ih_scratch2); 1001 else 1002 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 1003 hdlp->ih_scratch1, hdlp->ih_pri, 1004 (int)(uintptr_t)hdlp->ih_scratch2); 1005 break; 1006 case PSM_INTR_OP_FREE_VECTORS: 1007 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 1008 hdlp->ih_pri, hdlp->ih_type); 1009 break; 1010 case PSM_INTR_OP_NAVAIL_VECTORS: 1011 *result = apic_navail_vector(dip, hdlp->ih_pri); 1012 break; 1013 case PSM_INTR_OP_XLATE_VECTOR: 1014 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 1015 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 1016 break; 1017 case PSM_INTR_OP_GET_PENDING: 1018 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL) 1019 return (PSM_FAILURE); 1020 *result = apic_get_pending(irqp, hdlp->ih_type); 1021 break; 1022 case PSM_INTR_OP_CLEAR_MASK: 1023 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 1024 return (PSM_FAILURE); 1025 irqp = apic_find_irq(dip, ispec, hdlp->ih_type); 1026 if (irqp == NULL) 1027 return (PSM_FAILURE); 1028 apic_clear_mask(irqp); 1029 break; 1030 case PSM_INTR_OP_SET_MASK: 1031 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 1032 return (PSM_FAILURE); 1033 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL) 1034 return (PSM_FAILURE); 1035 apic_set_mask(irqp); 1036 break; 1037 case PSM_INTR_OP_GET_CAP: 1038 cap = DDI_INTR_FLAG_PENDING; 1039 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 1040 cap |= DDI_INTR_FLAG_MASKABLE; 1041 else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX) 1042 cap |= DDI_INTR_FLAG_RETARGETABLE; 1043 *result = cap; 1044 break; 1045 case PSM_INTR_OP_GET_SHARED: 1046 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 1047 return (PSM_FAILURE); 1048 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL) 1049 return (PSM_FAILURE); 1050 *result = irqp->airq_share ? 1: 0; 1051 break; 1052 case PSM_INTR_OP_SET_PRI: 1053 old_priority = hdlp->ih_pri; /* save old value */ 1054 new_priority = *(int *)result; /* try the new value */ 1055 1056 /* First, check if "hdlp->ih_scratch1" vectors exist? */ 1057 if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1) 1058 return (PSM_FAILURE); 1059 1060 /* Now allocate the vectors */ 1061 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 1062 count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 1063 hdlp->ih_scratch1, new_priority, 1064 DDI_INTR_ALLOC_STRICT); 1065 else 1066 count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 1067 hdlp->ih_scratch1, new_priority, 1068 DDI_INTR_ALLOC_STRICT); 1069 1070 /* Did we get new vectors? */ 1071 if (!count_vec) 1072 return (PSM_FAILURE); 1073 1074 /* Finally, free the previously allocated vectors */ 1075 apic_free_vectors(dip, hdlp->ih_inum, count_vec, 1076 old_priority, hdlp->ih_type); 1077 hdlp->ih_pri = new_priority; /* set the new value */ 1078 break; 1079 case PSM_INTR_OP_SET_CPU: 1080 case PSM_INTR_OP_GRP_SET_CPU: 1081 /* 1082 * The interrupt handle given here has been allocated 1083 * specifically for this command, and ih_private carries 1084 * a CPU value. 1085 */ 1086 new_cpu = (int)(intptr_t)hdlp->ih_private; 1087 if (!apic_cpu_in_range(new_cpu)) { 1088 DDI_INTR_IMPLDBG((CE_CONT, 1089 "[grp_]set_cpu: cpu out of range: %d\n", new_cpu)); 1090 *result = EINVAL; 1091 return (PSM_FAILURE); 1092 } 1093 if (hdlp->ih_vector > APIC_MAX_VECTOR) { 1094 DDI_INTR_IMPLDBG((CE_CONT, 1095 "[grp_]set_cpu: vector out of range: %d\n", 1096 hdlp->ih_vector)); 1097 *result = EINVAL; 1098 return (PSM_FAILURE); 1099 } 1100 if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ)) 1101 hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector]; 1102 if (intr_op == PSM_INTR_OP_SET_CPU) { 1103 if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) != 1104 PSM_SUCCESS) 1105 return (PSM_FAILURE); 1106 } else { 1107 if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu, 1108 result) != PSM_SUCCESS) 1109 return (PSM_FAILURE); 1110 } 1111 break; 1112 case PSM_INTR_OP_GET_INTR: 1113 /* 1114 * The interrupt handle given here has been allocated 1115 * specifically for this command, and ih_private carries 1116 * a pointer to a apic_get_intr_t. 1117 */ 1118 if (apic_get_vector_intr_info( 1119 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 1120 return (PSM_FAILURE); 1121 break; 1122 case PSM_INTR_OP_APIC_TYPE: 1123 hdlp->ih_private = apic_get_apic_type(); 1124 hdlp->ih_ver = apic_get_apic_version(); 1125 break; 1126 case PSM_INTR_OP_SET_CAP: 1127 default: 1128 return (PSM_FAILURE); 1129 } 1130 return (PSM_SUCCESS); 1131 } 1132 #endif /* !__xpv */ 1133