1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2018 Joyent, Inc. 26 */ 27 28 #define PSMI_1_7 29 30 #include <sys/mutex.h> 31 #include <sys/types.h> 32 #include <sys/time.h> 33 #include <sys/clock.h> 34 #include <sys/machlock.h> 35 #include <sys/smp_impldefs.h> 36 #include <sys/uadmin.h> 37 #include <sys/promif.h> 38 #include <sys/psm.h> 39 #include <sys/psm_common.h> 40 #include <sys/atomic.h> 41 #include <sys/apic.h> 42 #include <sys/archsystm.h> 43 #include <sys/mach_intr.h> 44 #include <sys/hypervisor.h> 45 #include <sys/evtchn_impl.h> 46 #include <sys/modctl.h> 47 #include <sys/trap.h> 48 #include <sys/panic.h> 49 #include <sys/sysmacros.h> 50 #include <sys/pci_intr_lib.h> 51 #include <vm/hat_i86.h> 52 53 #include <xen/public/vcpu.h> 54 #include <xen/public/physdev.h> 55 56 57 /* 58 * Global Data 59 */ 60 61 int xen_psm_verbose = 0; 62 63 /* As of now we don't support x2apic in xVM */ 64 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 65 int apic_error = 0; 66 int apic_verbose = 0; 67 cpuset_t apic_cpumask; 68 int apic_forceload = 0; 69 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 70 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 71 }; 72 uchar_t apic_ipltopri[MAXIPL + 1]; 73 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 74 uint_t apic_picinit_called; 75 apic_cpus_info_t *apic_cpus; 76 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 77 /* use to make sure only one cpu handles the nmi */ 78 static lock_t xen_psm_nmi_lock; 79 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 80 int xen_psm_panic_on_nmi = 0; 81 int xen_psm_num_nmis = 0; 82 83 cpuset_t xen_psm_cpus_online; /* online cpus */ 84 int xen_psm_ncpus = 1; /* cpu count */ 85 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 86 87 int xen_support_msi = 0; 88 89 static int xen_clock_irq = INVALID_IRQ; 90 91 /* flag definitions for xen_psm_verbose */ 92 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 93 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 94 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 95 96 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 97 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 98 cmn_err fmt; 99 100 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 101 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 102 prom_printf fmt; 103 104 /* 105 * Dummy apic array to point common routines at that want to do some apic 106 * manipulation. Xen doesn't allow guest apic access so we point at these 107 * memory locations to fake out those who want to do apic fiddling. 108 */ 109 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 110 111 static struct psm_info xen_psm_info; 112 static void xen_psm_setspl(int); 113 114 int 115 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 116 int behavior); 117 int 118 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 119 int behavior); 120 121 /* 122 * Local support routines 123 */ 124 125 /* 126 * Select vcpu to bind xen virtual device interrupt to. 127 */ 128 /*ARGSUSED*/ 129 int 130 xen_psm_bind_intr(int irq) 131 { 132 int bind_cpu; 133 apic_irq_t *irqptr; 134 135 bind_cpu = IRQ_UNBOUND; 136 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 137 return (bind_cpu); 138 if (irq <= APIC_MAX_VECTOR) 139 irqptr = apic_irq_table[irq]; 140 else 141 irqptr = NULL; 142 if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 143 bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 144 if (bind_cpu != IRQ_UNBOUND) { 145 if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 146 bind_cpu = 0; 147 goto done; 148 } 149 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 150 do { 151 bind_cpu = xen_psm_next_bind_cpu++; 152 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 153 xen_psm_next_bind_cpu = 0; 154 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 155 } else { 156 bind_cpu = 0; 157 } 158 done: 159 return (bind_cpu); 160 } 161 162 /* 163 * Autoconfiguration Routines 164 */ 165 166 static int 167 xen_psm_probe(void) 168 { 169 int ret = PSM_SUCCESS; 170 171 if (DOMAIN_IS_INITDOMAIN(xen_info)) 172 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 173 return (ret); 174 } 175 176 static void 177 xen_psm_softinit(void) 178 { 179 /* LINTED logical expression always true: op "||" */ 180 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 181 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 182 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 183 apic_init_common(); 184 } 185 } 186 187 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 188 189 /*ARGSUSED*/ 190 static int 191 xen_psm_clkinit(int hertz) 192 { 193 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 194 extern int dosynctodr; 195 196 /* 197 * domU cannot set the TOD hardware, fault the TOD clock now to 198 * indicate that and turn off attempts to sync TOD hardware 199 * with the hires timer. 200 */ 201 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 202 mutex_enter(&tod_lock); 203 (void) tod_fault(TOD_RDONLY, 0); 204 dosynctodr = 0; 205 mutex_exit(&tod_lock); 206 } 207 /* 208 * The hypervisor provides a timer based on the local APIC timer. 209 * The interface supports requests of nanosecond resolution. 210 * A common frequency of the apic clock is 100 Mhz which 211 * gives a resolution of 10 nsec per tick. What we would really like 212 * is a way to get the ns per tick value from xen. 213 * XXPV - This is an assumption that needs checking and may change 214 */ 215 return (XEN_NSEC_PER_TICK); 216 } 217 218 static void 219 xen_psm_hrtimeinit(void) 220 { 221 extern int gethrtime_hires; 222 gethrtime_hires = 1; 223 } 224 225 /* xen_psm NMI handler */ 226 static uint_t 227 xen_psm_nmi_intr(caddr_t arg __unused, caddr_t arg1 __unused) 228 { 229 xen_psm_num_nmis++; 230 231 if (!lock_try(&xen_psm_nmi_lock)) 232 return (DDI_INTR_UNCLAIMED); 233 234 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 debug_enter("NMI received: entering kmdb\n"); 236 } else if (xen_psm_panic_on_nmi) { 237 /* Keep panic from entering kmdb. */ 238 nopanicdebug = 1; 239 panic("NMI received\n"); 240 } else { 241 /* 242 * prom_printf is the best shot we have of something which is 243 * problem free from high level/NMI type of interrupts 244 */ 245 prom_printf("NMI received\n"); 246 } 247 248 lock_clear(&xen_psm_nmi_lock); 249 return (DDI_INTR_CLAIMED); 250 } 251 252 static void 253 xen_psm_picinit() 254 { 255 int cpu, irqno; 256 cpuset_t cpus; 257 258 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 259 /* set a flag so we know we have run xen_psm_picinit() */ 260 apic_picinit_called = 1; 261 LOCK_INIT_CLEAR(&apic_ioapic_lock); 262 263 /* XXPV - do we need to do this? */ 264 picsetup(); /* initialise the 8259 */ 265 266 /* enable apic mode if imcr present */ 267 /* XXPV - do we need to do this either? */ 268 if (apic_imcrp) { 269 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 270 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 271 } 272 273 ioapic_init_intr(IOAPIC_NOMASK); 274 /* 275 * We never called xen_psm_addspl() when the SCI 276 * interrupt was added because that happened before the 277 * PSM module was loaded. Fix that up here by doing 278 * any missed operations (e.g. bind to CPU) 279 */ 280 if ((irqno = apic_sci_vect) > 0) { 281 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 282 CPUSET_ZERO(cpus); 283 CPUSET_OR(cpus, xen_psm_cpus_online); 284 } else { 285 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 286 } 287 ec_set_irq_affinity(irqno, cpus); 288 apic_irq_table[irqno]->airq_temp_cpu = 289 (uchar_t)(cpu & ~IRQ_USER_BOUND); 290 ec_enable_irq(irqno); 291 } 292 } 293 294 /* add nmi handler - least priority nmi handler */ 295 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 296 297 if (!psm_add_nmintr(0, xen_psm_nmi_intr, 298 "xVM_psm NMI handler", (caddr_t)NULL)) 299 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 300 } 301 302 303 /* 304 * generates an interprocessor interrupt to another CPU 305 */ 306 static void 307 xen_psm_send_ipi(int cpun, int ipl) 308 { 309 ulong_t flag = intr_clear(); 310 311 ec_send_ipi(ipl, cpun); 312 intr_restore(flag); 313 } 314 315 /*ARGSUSED*/ 316 static int 317 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 318 { 319 int cpu, ret; 320 cpuset_t cpus; 321 322 /* 323 * We are called at splhi() so we can't call anything that might end 324 * up trying to context switch. 325 */ 326 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 327 DOMAIN_IS_INITDOMAIN(xen_info)) { 328 /* 329 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 330 */ 331 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 332 } else { 333 /* 334 * Set priority/affinity/enable for non PIRQs 335 */ 336 ret = ec_set_irq_priority(irqno, ipl); 337 ASSERT(ret == 0); 338 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 339 CPUSET_ZERO(cpus); 340 CPUSET_OR(cpus, xen_psm_cpus_online); 341 } else { 342 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 343 } 344 ec_set_irq_affinity(irqno, cpus); 345 ec_enable_irq(irqno); 346 } 347 return (ret); 348 } 349 350 /* 351 * Acquire ownership of this irq on this cpu 352 */ 353 void 354 xen_psm_acquire_irq(int irq) 355 { 356 ulong_t flags; 357 int cpuid; 358 359 /* 360 * If the irq is currently being serviced by another cpu 361 * we busy-wait for the other cpu to finish. Take any 362 * pending interrupts before retrying. 363 */ 364 do { 365 flags = intr_clear(); 366 cpuid = ec_block_irq(irq); 367 intr_restore(flags); 368 } while (cpuid != CPU->cpu_id); 369 } 370 371 /*ARGSUSED*/ 372 static int 373 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 374 { 375 apic_irq_t *irqptr; 376 int err = PSM_SUCCESS; 377 378 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 379 DOMAIN_IS_INITDOMAIN(xen_info)) { 380 irqptr = apic_irq_table[irqno]; 381 /* 382 * unbind if no more sharers of this irq/evtchn 383 */ 384 if (irqptr->airq_share == 1) { 385 xen_psm_acquire_irq(irqno); 386 ec_unbind_irq(irqno); 387 } 388 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 389 /* 390 * If still in use reset priority 391 */ 392 if (!err && irqptr->airq_share != 0) { 393 err = ec_set_irq_priority(irqno, max_ipl); 394 return (err); 395 } 396 } else { 397 xen_psm_acquire_irq(irqno); 398 ec_unbind_irq(irqno); 399 } 400 return (err); 401 } 402 403 static processorid_t 404 xen_psm_get_next_processorid(processorid_t id) 405 { 406 if (id == -1) 407 return (0); 408 409 for (id++; id < NCPU; id++) { 410 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 411 case 0: /* yeah, that one's there */ 412 return (id); 413 default: 414 case X_EINVAL: /* out of range */ 415 return (-1); 416 case X_ENOENT: /* not present in the domain */ 417 /* 418 * It's not clear that we -need- to keep looking 419 * at this point, if, e.g., we can guarantee 420 * the hypervisor always keeps a contiguous range 421 * of vcpus around this is equivalent to "out of range". 422 * 423 * But it would be sad to miss a vcpu we're 424 * supposed to be using .. 425 */ 426 break; 427 } 428 } 429 430 return (-1); 431 } 432 433 /* 434 * XXPV - undo the start cpu op change; return to ignoring this value 435 * - also tweak error handling in main startup loop 436 */ 437 /*ARGSUSED*/ 438 static int 439 xen_psm_cpu_start(processorid_t id, caddr_t arg) 440 { 441 int ret; 442 443 ASSERT(id > 0); 444 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 445 ec_bind_cpu_ipis(id); 446 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 447 if ((ret = xen_vcpu_up(id)) == 0) 448 xen_psm_ncpus++; 449 else 450 ret = EINVAL; 451 return (ret); 452 } 453 454 /* 455 * Allocate an irq for inter cpu signaling 456 */ 457 /*ARGSUSED*/ 458 static int 459 xen_psm_get_ipivect(int ipl, int type) 460 { 461 return (ec_bind_ipi_to_irq(ipl, 0)); 462 } 463 464 /*ARGSUSED*/ 465 static int 466 xen_psm_get_clockirq(int ipl) 467 { 468 if (xen_clock_irq != INVALID_IRQ) 469 return (xen_clock_irq); 470 471 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 472 return (xen_clock_irq); 473 } 474 475 /*ARGSUSED*/ 476 static void 477 xen_psm_shutdown(int cmd, int fcn) 478 { 479 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 480 481 switch (cmd) { 482 case A_SHUTDOWN: 483 switch (fcn) { 484 case AD_BOOT: 485 case AD_IBOOT: 486 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 487 break; 488 case AD_POWEROFF: 489 /* fall through if domU or if poweroff fails */ 490 if (DOMAIN_IS_INITDOMAIN(xen_info)) 491 if (apic_enable_acpi) 492 (void) acpi_poweroff(); 493 /* FALLTHRU */ 494 case AD_HALT: 495 default: 496 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 497 break; 498 } 499 break; 500 case A_REBOOT: 501 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 502 break; 503 default: 504 return; 505 } 506 } 507 508 509 static int 510 xen_psm_translate_irq(dev_info_t *dip, int irqno) 511 { 512 if (dip == NULL) { 513 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 514 " dip = NULL\n", irqno)); 515 return (irqno); 516 } 517 return (irqno); 518 } 519 520 /* 521 * xen_psm_intr_enter() acks the event that triggered the interrupt and 522 * returns the new priority level, 523 */ 524 /*ARGSUSED*/ 525 static int 526 xen_psm_intr_enter(int ipl, int *vector) 527 { 528 int newipl; 529 uint_t intno; 530 cpu_t *cpu = CPU; 531 532 intno = (*vector); 533 534 ASSERT(intno < NR_IRQS); 535 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 536 537 if (!ec_is_edge_pirq(intno)) 538 ec_clear_irq(intno); 539 540 newipl = autovect[intno].avh_hi_pri; 541 if (newipl == 0) { 542 /* 543 * (newipl == 0) means we have no service routines for this 544 * vector. We will treat this as a spurious interrupt. 545 * We have cleared the pending bit already, clear the event 546 * mask and return a spurious interrupt. This case can happen 547 * when an interrupt delivery is racing with the removal of 548 * of the service routine for that interrupt. 549 */ 550 ec_unmask_irq(intno); 551 newipl = -1; /* flag spurious interrupt */ 552 } else if (newipl <= cpu->cpu_pri) { 553 /* 554 * (newipl <= cpu->cpu_pri) means that we must be trying to 555 * service a vector that was shared with a higher priority 556 * isr. The higher priority handler has been removed and 557 * we need to service this int. We can't return a lower 558 * priority than current cpu priority. Just synthesize a 559 * priority to return that should be acceptable. 560 * It should never happen that we synthesize a priority that 561 * moves us from low-priority to high-priority that would make 562 * a us incorrectly run on the high priority stack. 563 */ 564 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 565 ASSERT(newipl != LOCK_LEVEL + 1); 566 } 567 return (newipl); 568 } 569 570 571 /* 572 * xen_psm_intr_exit() restores the old interrupt 573 * priority level after processing an interrupt. 574 * It is called with interrupts disabled, and does not enable interrupts. 575 */ 576 /* ARGSUSED */ 577 static void 578 xen_psm_intr_exit(int ipl, int vector) 579 { 580 ec_try_unmask_irq(vector); 581 xen_psm_setspl(ipl); 582 } 583 584 intr_exit_fn_t 585 psm_intr_exit_fn(void) 586 { 587 return (xen_psm_intr_exit); 588 } 589 590 /* 591 * Check if new ipl level allows delivery of previously unserviced events 592 */ 593 static void 594 xen_psm_setspl(int ipl) 595 { 596 struct cpu *cpu = CPU; 597 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 598 uint16_t pending; 599 600 ASSERT(vci->evtchn_upcall_mask != 0); 601 602 /* 603 * If new ipl level will enable any pending interrupts, setup so the 604 * upcoming sti will cause us to get an upcall. 605 */ 606 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 607 if (pending) { 608 int i; 609 ulong_t pending_sels = 0; 610 volatile ulong_t *selp; 611 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 612 613 for (i = bsrw_insn(pending); i > ipl; i--) 614 pending_sels |= cpe->pending_sel[i]; 615 ASSERT(pending_sels); 616 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 617 atomic_or_ulong(selp, pending_sels); 618 vci->evtchn_upcall_pending = 1; 619 } 620 } 621 622 /* 623 * This function provides external interface to the nexus for all 624 * functionality related to the new DDI interrupt framework. 625 * 626 * Input: 627 * dip - pointer to the dev_info structure of the requested device 628 * hdlp - pointer to the internal interrupt handle structure for the 629 * requested interrupt 630 * intr_op - opcode for this call 631 * result - pointer to the integer that will hold the result to be 632 * passed back if return value is PSM_SUCCESS 633 * 634 * Output: 635 * return value is either PSM_SUCCESS or PSM_FAILURE 636 */ 637 int 638 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 639 psm_intr_op_t intr_op, int *result) 640 { 641 int cap; 642 int err; 643 int new_priority; 644 apic_irq_t *irqp; 645 struct intrspec *ispec; 646 647 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 648 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 649 650 switch (intr_op) { 651 case PSM_INTR_OP_CHECK_MSI: 652 /* 653 * Till PCI passthru is supported, only dom0 has MSI/MSIX 654 */ 655 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 656 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 657 DDI_INTR_TYPE_MSIX); 658 break; 659 } 660 /* 661 * Check MSI/X is supported or not at APIC level and 662 * masked off the MSI/X bits in hdlp->ih_type if not 663 * supported before return. If MSI/X is supported, 664 * leave the ih_type unchanged and return. 665 * 666 * hdlp->ih_type passed in from the nexus has all the 667 * interrupt types supported by the device. 668 */ 669 if (xen_support_msi == 0) { 670 /* 671 * if xen_support_msi is not set, call 672 * apic_check_msi_support() to check whether msi 673 * is supported first 674 */ 675 if (apic_check_msi_support() == PSM_SUCCESS) 676 xen_support_msi = 1; 677 else 678 xen_support_msi = -1; 679 } 680 if (xen_support_msi == 1) 681 *result = hdlp->ih_type; 682 else 683 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 684 DDI_INTR_TYPE_MSIX); 685 break; 686 case PSM_INTR_OP_ALLOC_VECTORS: 687 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 688 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 689 hdlp->ih_scratch1, hdlp->ih_pri, 690 (int)(uintptr_t)hdlp->ih_scratch2); 691 else 692 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 693 hdlp->ih_scratch1, hdlp->ih_pri, 694 (int)(uintptr_t)hdlp->ih_scratch2); 695 break; 696 case PSM_INTR_OP_FREE_VECTORS: 697 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 698 hdlp->ih_pri, hdlp->ih_type); 699 break; 700 case PSM_INTR_OP_NAVAIL_VECTORS: 701 /* 702 * XXPV - maybe we should make this be: 703 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 704 */ 705 if (DOMAIN_IS_INITDOMAIN(xen_info)) 706 *result = APIC_VECTOR_PER_IPL; 707 else 708 *result = 1; 709 break; 710 case PSM_INTR_OP_XLATE_VECTOR: 711 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 712 if (ispec->intrspec_vec >= PIRQ_BASE && 713 ispec->intrspec_vec < NR_PIRQS && 714 DOMAIN_IS_INITDOMAIN(xen_info)) { 715 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 716 } else { 717 *result = ispec->intrspec_vec; 718 } 719 break; 720 case PSM_INTR_OP_GET_PENDING: 721 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 722 *result = ec_pending_irq(hdlp->ih_vector); 723 break; 724 case PSM_INTR_OP_CLEAR_MASK: 725 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 726 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 727 return (PSM_FAILURE); 728 ec_enable_irq(hdlp->ih_vector); 729 break; 730 case PSM_INTR_OP_SET_MASK: 731 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 732 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 733 return (PSM_FAILURE); 734 ec_disable_irq(hdlp->ih_vector); 735 break; 736 case PSM_INTR_OP_GET_CAP: 737 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 738 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 739 cap |= DDI_INTR_FLAG_MASKABLE; 740 *result = cap; 741 break; 742 case PSM_INTR_OP_GET_SHARED: 743 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 744 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 745 return (PSM_FAILURE); 746 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 747 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 748 == NULL) 749 return (PSM_FAILURE); 750 *result = (irqp->airq_share > 1) ? 1: 0; 751 } else { 752 return (PSM_FAILURE); 753 } 754 break; 755 case PSM_INTR_OP_SET_PRI: 756 new_priority = *(int *)result; 757 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 758 if (err != 0) 759 return (PSM_FAILURE); 760 break; 761 case PSM_INTR_OP_GET_INTR: 762 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 763 return (PSM_FAILURE); 764 /* 765 * The interrupt handle given here has been allocated 766 * specifically for this command, and ih_private carries 767 * a pointer to a apic_get_intr_t. 768 */ 769 if (apic_get_vector_intr_info( 770 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 771 return (PSM_FAILURE); 772 break; 773 case PSM_INTR_OP_SET_CAP: 774 /* FALLTHRU */ 775 default: 776 return (PSM_FAILURE); 777 } 778 return (PSM_SUCCESS); 779 } 780 781 static void 782 xen_psm_rebind_irq(int irq) 783 { 784 cpuset_t ncpu; 785 processorid_t newcpu; 786 apic_irq_t *irqptr; 787 788 newcpu = xen_psm_bind_intr(irq); 789 if (newcpu == IRQ_UNBOUND) { 790 CPUSET_ZERO(ncpu); 791 CPUSET_OR(ncpu, xen_psm_cpus_online); 792 } else { 793 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 794 } 795 ec_set_irq_affinity(irq, ncpu); 796 if (irq <= APIC_MAX_VECTOR) { 797 irqptr = apic_irq_table[irq]; 798 ASSERT(irqptr != NULL); 799 irqptr->airq_temp_cpu = (uchar_t)newcpu; 800 } 801 } 802 803 /* 804 * Disable all device interrupts for the given cpu. 805 * High priority interrupts are not disabled and will still be serviced. 806 */ 807 static int 808 xen_psm_disable_intr(processorid_t cpun) 809 { 810 int irq; 811 812 /* 813 * Can't offline VCPU 0 on this hypervisor. There's no reason 814 * anyone would want to given that the CPUs are virtual. Also note 815 * that the hypervisor requires suspend/resume to be on VCPU 0. 816 */ 817 if (cpun == 0) 818 return (PSM_FAILURE); 819 820 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 821 for (irq = 0; irq < NR_IRQS; irq++) { 822 if (!ec_irq_needs_rebind(irq, cpun)) 823 continue; 824 xen_psm_rebind_irq(irq); 825 } 826 return (PSM_SUCCESS); 827 } 828 829 static void 830 xen_psm_enable_intr(processorid_t cpun) 831 { 832 int irq; 833 834 if (cpun == 0) 835 return; 836 837 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 838 839 /* 840 * Rebalance device interrupts among online processors 841 */ 842 for (irq = 0; irq < NR_IRQS; irq++) { 843 if (!ec_irq_rebindable(irq)) 844 continue; 845 xen_psm_rebind_irq(irq); 846 } 847 848 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 849 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 850 } 851 } 852 853 static int 854 xen_psm_post_cpu_start() 855 { 856 processorid_t cpun; 857 858 cpun = psm_get_cpu_id(); 859 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 860 /* 861 * Non-virtualized environments can call psm_post_cpu_start 862 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 863 * xen_psm_post_cpu_start() is only called from boot. 864 */ 865 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 866 } 867 return (PSM_SUCCESS); 868 } 869 870 /* 871 * This function will reprogram the timer. 872 * 873 * When in oneshot mode the argument is the absolute time in future at which to 874 * generate the interrupt. 875 * 876 * When in periodic mode, the argument is the interval at which the 877 * interrupts should be generated. There is no need to support the periodic 878 * mode timer change at this time. 879 * 880 * Note that we must be careful to convert from hrtime to Xen system time (see 881 * xpv_timestamp.c). 882 */ 883 static void 884 xen_psm_timer_reprogram(hrtime_t timer_req) 885 { 886 hrtime_t now, timer_new, time_delta, xen_time; 887 ulong_t flags; 888 889 flags = intr_clear(); 890 /* 891 * We should be called from high PIL context (CBE_HIGH_PIL), 892 * so kpreempt is disabled. 893 */ 894 895 now = xpv_gethrtime(); 896 xen_time = xpv_getsystime(); 897 if (timer_req <= now) { 898 /* 899 * requested to generate an interrupt in the past 900 * generate an interrupt as soon as possible 901 */ 902 time_delta = XEN_NSEC_PER_TICK; 903 } else 904 time_delta = timer_req - now; 905 906 timer_new = xen_time + time_delta; 907 if (HYPERVISOR_set_timer_op(timer_new) != 0) 908 panic("can't set hypervisor timer?"); 909 intr_restore(flags); 910 } 911 912 /* 913 * This function will enable timer interrupts. 914 */ 915 static void 916 xen_psm_timer_enable(void) 917 { 918 ec_unmask_irq(xen_clock_irq); 919 } 920 921 /* 922 * This function will disable timer interrupts on the current cpu. 923 */ 924 static void 925 xen_psm_timer_disable(void) 926 { 927 (void) ec_block_irq(xen_clock_irq); 928 /* 929 * If the clock irq is pending on this cpu then we need to 930 * clear the pending interrupt. 931 */ 932 ec_unpend_irq(xen_clock_irq); 933 } 934 935 /* 936 * 937 * The following functions are in the platform specific file so that they 938 * can be different functions depending on whether we are running on 939 * bare metal or a hypervisor. 940 */ 941 942 /* 943 * Allocate a free vector for irq at ipl. 944 */ 945 /* ARGSUSED */ 946 uchar_t 947 apic_allocate_vector(int ipl, int irq, int pri) 948 { 949 physdev_irq_t irq_op; 950 uchar_t vector; 951 int rc; 952 953 irq_op.irq = irq; 954 955 if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 956 != 0) 957 panic("Hypervisor alloc vector failed err: %d", -rc); 958 vector = irq_op.vector; 959 /* 960 * No need to worry about vector colliding with our reserved vectors 961 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 962 * generated traps and handle them properly. 963 */ 964 apic_vector_to_irq[vector] = (uchar_t)irq; 965 return (vector); 966 } 967 968 /* Mark vector as not being used by any irq */ 969 void 970 apic_free_vector(uchar_t vector) 971 { 972 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 973 } 974 975 /* 976 * This function returns the no. of vectors available for the pri. 977 * dip is not used at this moment. If we really don't need that, 978 * it will be removed. Since priority is not limited by hardware 979 * when running on the hypervisor we simply return the maximum no. 980 * of available contiguous vectors. 981 */ 982 /*ARGSUSED*/ 983 int 984 apic_navail_vector(dev_info_t *dip, int pri) 985 { 986 int lowest, highest, i, navail, count; 987 988 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 989 (void *)dip, pri)); 990 991 highest = APIC_MAX_VECTOR; 992 lowest = APIC_BASE_VECT; 993 navail = count = 0; 994 995 /* It has to be contiguous */ 996 for (i = lowest; i < highest; i++) { 997 count = 0; 998 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 999 (i < highest)) { 1000 count++; 1001 i++; 1002 } 1003 if (count > navail) 1004 navail = count; 1005 } 1006 return (navail); 1007 } 1008 1009 static physdev_manage_pci_t *managed_devlist; 1010 static int mdev_cnt; 1011 static int mdev_size = 128; 1012 static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1013 1014 /* 1015 * Add devfn on given bus to devices managed by hypervisor 1016 */ 1017 static int 1018 xen_manage_device(uint8_t bus, uint8_t devfn) 1019 { 1020 physdev_manage_pci_t manage_pci, *newlist; 1021 int rc, i, oldsize; 1022 1023 /* 1024 * Check if bus/devfn already managed. If so just return success. 1025 */ 1026 if (managed_devlist == NULL) { 1027 managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1028 mdev_size, KM_NOSLEEP); 1029 if (managed_devlist == NULL) { 1030 cmn_err(CE_WARN, 1031 "Can't alloc space for managed device list"); 1032 return (0); 1033 } 1034 }; 1035 for (i = 0; i < mdev_cnt; i++) { 1036 if (managed_devlist[i].bus == bus && 1037 managed_devlist[i].devfn == devfn) 1038 return (1); /* device already managed */ 1039 } 1040 manage_pci.bus = bus; 1041 manage_pci.devfn = devfn; 1042 rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1043 if (rc < 0) { 1044 cmn_err(CE_WARN, 1045 "hypervisor add pci device call failed bus:0x%x" 1046 " devfn:0x%x", bus, devfn); 1047 return (0); 1048 } 1049 /* 1050 * Add device to the managed device list 1051 */ 1052 if (i == mdev_size) { 1053 /* 1054 * grow the managed device list 1055 */ 1056 oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1057 mdev_size *= 2; 1058 newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1059 KM_NOSLEEP); 1060 if (newlist == NULL) { 1061 cmn_err(CE_WARN, "Can't grow managed device list"); 1062 return (0); 1063 } 1064 bcopy(managed_devlist, newlist, oldsize); 1065 kmem_free(managed_devlist, oldsize); 1066 managed_devlist = newlist; 1067 } 1068 managed_devlist[i].bus = bus; 1069 managed_devlist[i].devfn = devfn; 1070 mdev_cnt++; 1071 return (1); 1072 } 1073 1074 /* 1075 * allocate an apic irq struct for an MSI interrupt 1076 */ 1077 static int 1078 msi_allocate_irq(int irq) 1079 { 1080 apic_irq_t *irqptr = apic_irq_table[irq]; 1081 1082 if (irqptr == NULL) { 1083 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1084 if (irqptr == NULL) { 1085 cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1086 return (-1); 1087 } 1088 apic_irq_table[irq] = irqptr; 1089 } else { 1090 if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1091 irqptr->airq_mps_intr_index = FREE_INDEX; 1092 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1093 cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1094 return (-1); 1095 } 1096 } 1097 irqptr->airq_mps_intr_index = FREE_INDEX; 1098 return (irq); 1099 } 1100 1101 /* 1102 * read MSI/MSIX vector out of config space 1103 */ 1104 static uchar_t 1105 xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1106 { 1107 uint64_t msi_data = 0; 1108 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1109 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1110 ushort_t msi_ctrl; 1111 uchar_t vector; 1112 1113 ASSERT((handle != NULL) && (cap_ptr != 0)); 1114 vector = 0; 1115 if (type == DDI_INTR_TYPE_MSI) { 1116 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1117 /* 1118 * Get vector 1119 */ 1120 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1121 msi_data = pci_config_get16(handle, 1122 cap_ptr + PCI_MSI_64BIT_DATA); 1123 } else { 1124 msi_data = pci_config_get16(handle, 1125 cap_ptr + PCI_MSI_32BIT_DATA); 1126 } 1127 vector = (msi_data & 0xff) + entry; 1128 } else if (type == DDI_INTR_TYPE_MSIX) { 1129 uintptr_t off; 1130 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1131 1132 /* Offset into the given entry in the MSI-X table */ 1133 off = (uintptr_t)msix_p->msix_tbl_addr + 1134 (entry * PCI_MSIX_VECTOR_SIZE); 1135 1136 msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1137 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1138 vector = msi_data & 0xff; 1139 } 1140 return (vector); 1141 } 1142 1143 1144 static void 1145 get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1146 { 1147 pci_regspec_t *regspec; 1148 int reglen; 1149 1150 /* 1151 * Get device reg spec, first word has PCI bus and 1152 * device/function info we need. 1153 */ 1154 if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1155 (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1156 cmn_err(CE_WARN, 1157 "get_busdevfn() failed to get regspec."); 1158 return; 1159 } 1160 /* 1161 * get PCI bus # from reg spec for device 1162 */ 1163 *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1164 /* 1165 * get combined device/function from reg spec for device. 1166 */ 1167 *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1168 PCI_REG_FUNC_SHIFT; 1169 1170 kmem_free(regspec, reglen); 1171 } 1172 1173 /* 1174 * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1175 */ 1176 int 1177 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1178 int behavior) 1179 { 1180 int rcount, i, rc, irqno; 1181 uchar_t vector, cpu; 1182 major_t major; 1183 apic_irq_t *irqptr; 1184 physdev_map_pirq_t map_irq; 1185 int busnum, devfn; 1186 1187 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1188 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1189 (void *)dip, inum, pri, count, behavior)); 1190 1191 if (count > 1) { 1192 if (behavior == DDI_INTR_ALLOC_STRICT && 1193 apic_multi_msi_enable == 0) 1194 return (0); 1195 if (apic_multi_msi_enable == 0) 1196 count = 1; 1197 } 1198 1199 if ((rcount = apic_navail_vector(dip, pri)) > count) 1200 rcount = count; 1201 else if (rcount == 0 || (rcount < count && 1202 behavior == DDI_INTR_ALLOC_STRICT)) 1203 return (0); 1204 1205 /* if not ISP2, then round it down */ 1206 if (!ISP2(rcount)) 1207 rcount = 1 << (highbit(rcount) - 1); 1208 1209 /* 1210 * get PCI bus # and devfn from reg spec for device 1211 */ 1212 get_busdevfn(dip, &busnum, &devfn); 1213 1214 /* 1215 * Tell xen about this pci device 1216 */ 1217 if (!xen_manage_device(busnum, devfn)) 1218 return (0); 1219 1220 mutex_enter(&airq_mutex); 1221 1222 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1223 for (i = 0; i < rcount; i++) { 1224 /* 1225 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1226 */ 1227 map_irq.domid = DOMID_SELF; 1228 map_irq.type = MAP_PIRQ_TYPE_MSI; 1229 map_irq.index = -rcount; /* hypervisor auto allocates vectors */ 1230 map_irq.pirq = -1; 1231 map_irq.bus = busnum; 1232 map_irq.devfn = devfn; 1233 map_irq.entry_nr = i; 1234 map_irq.table_base = 0; 1235 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1236 irqno = map_irq.pirq; 1237 if (rc < 0) { 1238 mutex_exit(&airq_mutex); 1239 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1240 return (i); 1241 } 1242 if (irqno < 0) { 1243 mutex_exit(&airq_mutex); 1244 cmn_err(CE_NOTE, 1245 "!hypervisor not configured for MSI support"); 1246 xen_support_msi = -1; 1247 return (0); 1248 } 1249 1250 /* 1251 * Find out what vector the hypervisor assigned 1252 */ 1253 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i); 1254 1255 if (msi_allocate_irq(irqno) < 0) { 1256 mutex_exit(&airq_mutex); 1257 return (i); 1258 } 1259 apic_max_device_irq = max(irqno, apic_max_device_irq); 1260 apic_min_device_irq = min(irqno, apic_min_device_irq); 1261 irqptr = apic_irq_table[irqno]; 1262 ASSERT(irqptr != NULL); 1263 #ifdef DEBUG 1264 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1265 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1266 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1267 #endif 1268 apic_vector_to_irq[vector] = (uchar_t)irqno; 1269 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1270 1271 irqptr->airq_vector = vector; 1272 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1273 irqptr->airq_intin_no = (uchar_t)rcount; 1274 irqptr->airq_ipl = pri; 1275 irqptr->airq_origirq = (uchar_t)(inum + i); 1276 irqptr->airq_share_id = 0; 1277 irqptr->airq_mps_intr_index = MSI_INDEX; 1278 irqptr->airq_dip = dip; 1279 irqptr->airq_major = major; 1280 if (i == 0) /* they all bind to the same cpu */ 1281 cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1282 else 1283 irqptr->airq_cpu = cpu; 1284 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1285 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1286 (void *)irqptr->airq_dip, irqptr->airq_vector, 1287 irqptr->airq_origirq, pri)); 1288 } 1289 mutex_exit(&airq_mutex); 1290 return (rcount); 1291 } 1292 1293 /* 1294 * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1295 */ 1296 int 1297 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1298 int behavior) 1299 { 1300 int rcount, i, rc; 1301 major_t major; 1302 physdev_map_pirq_t map_irq; 1303 int busnum, devfn; 1304 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1305 uint64_t table_base; 1306 pfn_t pfnum; 1307 1308 if (msix_p == NULL) { 1309 msix_p = pci_msix_init(dip); 1310 if (msix_p != NULL) { 1311 i_ddi_set_msix(dip, msix_p); 1312 } else { 1313 cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1314 " msix_init failed"); 1315 return (0); 1316 } 1317 } 1318 /* 1319 * Hypervisor wants PCI config space address of msix table base 1320 */ 1321 pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1322 ~PFN_IS_FOREIGN_MFN; 1323 table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset | 1324 ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1325 /* 1326 * get PCI bus # and devfn from reg spec for device 1327 */ 1328 get_busdevfn(dip, &busnum, &devfn); 1329 1330 /* 1331 * Tell xen about this pci device 1332 */ 1333 if (!xen_manage_device(busnum, devfn)) 1334 return (0); 1335 mutex_enter(&airq_mutex); 1336 1337 if ((rcount = apic_navail_vector(dip, pri)) > count) 1338 rcount = count; 1339 else if (rcount == 0 || (rcount < count && 1340 behavior == DDI_INTR_ALLOC_STRICT)) { 1341 rcount = 0; 1342 goto out; 1343 } 1344 1345 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1346 for (i = 0; i < rcount; i++) { 1347 int irqno; 1348 uchar_t vector; 1349 apic_irq_t *irqptr; 1350 1351 /* 1352 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1353 */ 1354 map_irq.domid = DOMID_SELF; 1355 map_irq.type = MAP_PIRQ_TYPE_MSI; 1356 map_irq.index = -1; /* hypervisor auto allocates vector */ 1357 map_irq.pirq = -1; 1358 map_irq.bus = busnum; 1359 map_irq.devfn = devfn; 1360 map_irq.entry_nr = i; 1361 map_irq.table_base = table_base; 1362 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1363 irqno = map_irq.pirq; 1364 if (rc < 0) { 1365 mutex_exit(&airq_mutex); 1366 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1367 return (i); 1368 } 1369 if (irqno < 0) { 1370 mutex_exit(&airq_mutex); 1371 cmn_err(CE_NOTE, 1372 "!hypervisor not configured for MSI support"); 1373 xen_support_msi = -1; 1374 return (0); 1375 } 1376 /* 1377 * Find out what vector the hypervisor assigned 1378 */ 1379 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1380 1381 if (msi_allocate_irq(irqno) < 0) { 1382 mutex_exit(&airq_mutex); 1383 return (i); 1384 } 1385 apic_vector_to_irq[vector] = (uchar_t)irqno; 1386 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1387 apic_max_device_irq = max(irqno, apic_max_device_irq); 1388 apic_min_device_irq = min(irqno, apic_min_device_irq); 1389 irqptr = apic_irq_table[irqno]; 1390 ASSERT(irqptr != NULL); 1391 irqptr->airq_vector = (uchar_t)vector; 1392 irqptr->airq_ipl = pri; 1393 irqptr->airq_origirq = (uchar_t)(inum + i); 1394 irqptr->airq_share_id = 0; 1395 irqptr->airq_mps_intr_index = MSIX_INDEX; 1396 irqptr->airq_dip = dip; 1397 irqptr->airq_major = major; 1398 irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1399 } 1400 out: 1401 mutex_exit(&airq_mutex); 1402 return (rcount); 1403 } 1404 1405 1406 /* 1407 * This finds the apic_irq_t associated with the dip, ispec and type. 1408 * The entry should have already been freed, but it can not have been 1409 * reused yet since the hypervisor can not have reassigned the pirq since 1410 * we have not freed that yet. 1411 */ 1412 static apic_irq_t * 1413 msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1414 { 1415 apic_irq_t *irqp; 1416 int i; 1417 1418 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1419 if ((irqp = apic_irq_table[i]) == NULL) 1420 continue; 1421 if ((irqp->airq_dip == dip) && 1422 (irqp->airq_origirq == ispec->intrspec_vec) && 1423 (irqp->airq_ipl == ispec->intrspec_pri)) { 1424 return (irqp); 1425 } 1426 } 1427 return (NULL); 1428 } 1429 1430 void 1431 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1432 { 1433 int i, rc; 1434 physdev_unmap_pirq_t unmap_pirq; 1435 apic_irq_t *irqptr; 1436 struct intrspec ispec; 1437 1438 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1439 "count: %x pri: %x type: %x\n", 1440 (void *)dip, inum, count, pri, type)); 1441 1442 /* for MSI/X only */ 1443 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1444 return; 1445 1446 for (i = 0; i < count; i++) { 1447 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1448 "pri=0x%x count=0x%x\n", inum, pri, count)); 1449 ispec.intrspec_vec = inum + i; 1450 ispec.intrspec_pri = pri; 1451 if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1452 cmn_err(CE_WARN, 1453 "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1454 ddi_get_name(dip), ddi_get_name_addr(dip), 1455 (void *)dip, inum + i, pri); 1456 continue; 1457 } 1458 /* 1459 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1460 */ 1461 unmap_pirq.domid = DOMID_SELF; 1462 unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1463 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1464 if (rc < 0) { 1465 cmn_err(CE_WARN, "unmap pirq failed"); 1466 return; 1467 } 1468 irqptr->airq_mps_intr_index = FREE_INDEX; 1469 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1470 } 1471 } 1472 1473 /* 1474 * The hypervisor doesn't permit access to local apics directly 1475 */ 1476 /* ARGSUSED */ 1477 uint32_t * 1478 mapin_apic(uint32_t addr, size_t len, int flags) 1479 { 1480 /* 1481 * Return a pointer to a memory area to fake out the 1482 * probe code that wants to read apic registers. 1483 * The dummy values will end up being ignored by xen 1484 * later on when they are used anyway. 1485 */ 1486 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1487 return (xen_psm_dummy_apic); 1488 } 1489 1490 /* ARGSUSED */ 1491 uint32_t * 1492 mapin_ioapic(uint32_t addr, size_t len, int flags) 1493 { 1494 /* 1495 * Return non-null here to fake out configure code that calls this. 1496 * The i86xpv platform will not reference through the returned value.. 1497 */ 1498 return ((uint32_t *)0x1); 1499 } 1500 1501 /* ARGSUSED */ 1502 void 1503 mapout_apic(caddr_t addr, size_t len) 1504 { 1505 } 1506 1507 /* ARGSUSED */ 1508 void 1509 mapout_ioapic(caddr_t addr, size_t len) 1510 { 1511 } 1512 1513 uint32_t 1514 ioapic_read(int apic_ix, uint32_t reg) 1515 { 1516 physdev_apic_t apic; 1517 1518 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1519 apic.reg = reg; 1520 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1521 panic("read ioapic %d reg %d failed", apic_ix, reg); 1522 return (apic.value); 1523 } 1524 1525 void 1526 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1527 { 1528 physdev_apic_t apic; 1529 1530 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1531 apic.reg = reg; 1532 apic.value = value; 1533 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1534 panic("write ioapic %d reg %d failed", apic_ix, reg); 1535 } 1536 1537 /* 1538 * This function was added as part of x2APIC support in pcplusmp. 1539 */ 1540 void 1541 ioapic_write_eoi(int apic_ix, uint32_t value) 1542 { 1543 physdev_apic_t apic; 1544 1545 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1546 apic.reg = APIC_IO_EOI; 1547 apic.value = value; 1548 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1549 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1550 } 1551 1552 /* 1553 * This function was added as part of x2APIC support in pcplusmp to resolve 1554 * undefined symbol in xpv_psm. 1555 */ 1556 void 1557 x2apic_update_psm() 1558 { 1559 } 1560 1561 /* 1562 * This function was added as part of x2APIC support in pcplusmp to resolve 1563 * undefined symbol in xpv_psm. 1564 */ 1565 void 1566 apic_ret() 1567 { 1568 } 1569 1570 /* 1571 * Call rebind to do the actual programming. 1572 */ 1573 int 1574 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1575 { 1576 apic_irq_t *irqptr; 1577 struct ioapic_reprogram_data *drep = NULL; 1578 int rv, cpu; 1579 cpuset_t cpus; 1580 1581 if (deferred) { 1582 drep = (struct ioapic_reprogram_data *)p; 1583 ASSERT(drep != NULL); 1584 irqptr = drep->irqp; 1585 } else { 1586 irqptr = (apic_irq_t *)p; 1587 } 1588 ASSERT(irqptr != NULL); 1589 /* 1590 * Set cpu based on xen idea of online cpu's not apic tables. 1591 * Note that xen ignores/sets to it's own preferred value the 1592 * target cpu field when programming ioapic anyway. 1593 */ 1594 if (irqptr->airq_mps_intr_index == MSI_INDEX) 1595 cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1596 else { 1597 cpu = xen_psm_bind_intr(irq); 1598 irqptr->airq_cpu = cpu; 1599 } 1600 if (cpu == IRQ_UNBOUND) { 1601 CPUSET_ZERO(cpus); 1602 CPUSET_OR(cpus, xen_psm_cpus_online); 1603 } else { 1604 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1605 } 1606 rv = apic_rebind(irqptr, cpu, drep); 1607 if (rv) { 1608 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1609 cpu = 0; 1610 irqptr->airq_cpu = cpu; 1611 rv = apic_rebind(irqptr, cpu, drep); 1612 } 1613 /* 1614 * If rebind successful bind the irq to an event channel 1615 */ 1616 if (rv == 0) { 1617 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1618 CPUSET_FIND(cpus, cpu); 1619 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1620 } 1621 return (rv); 1622 } 1623 1624 /* 1625 * Allocate a new vector for the given irq 1626 */ 1627 /* ARGSUSED */ 1628 uchar_t 1629 apic_modify_vector(uchar_t vector, int irq) 1630 { 1631 return (apic_allocate_vector(0, irq, 0)); 1632 } 1633 1634 /* 1635 * The rest of the file is just generic psm module boilerplate 1636 */ 1637 1638 static struct psm_ops xen_psm_ops = { 1639 xen_psm_probe, /* psm_probe */ 1640 1641 xen_psm_softinit, /* psm_init */ 1642 xen_psm_picinit, /* psm_picinit */ 1643 xen_psm_intr_enter, /* psm_intr_enter */ 1644 xen_psm_intr_exit, /* psm_intr_exit */ 1645 xen_psm_setspl, /* psm_setspl */ 1646 xen_psm_addspl, /* psm_addspl */ 1647 xen_psm_delspl, /* psm_delspl */ 1648 xen_psm_disable_intr, /* psm_disable_intr */ 1649 xen_psm_enable_intr, /* psm_enable_intr */ 1650 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1651 (void (*)(int))NULL, /* psm_set_softintr */ 1652 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1653 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1654 1655 xen_psm_clkinit, /* psm_clkinit */ 1656 xen_psm_get_clockirq, /* psm_get_clockirq */ 1657 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1658 xpv_gethrtime, /* psm_gethrtime */ 1659 1660 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1661 xen_psm_cpu_start, /* psm_cpu_start */ 1662 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1663 xen_psm_shutdown, /* psm_shutdown */ 1664 xen_psm_get_ipivect, /* psm_get_ipivect */ 1665 xen_psm_send_ipi, /* psm_send_ipi */ 1666 1667 xen_psm_translate_irq, /* psm_translate_irq */ 1668 1669 (void (*)(int, char *))NULL, /* psm_notify_error */ 1670 (void (*)(int msg))NULL, /* psm_notify_func */ 1671 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1672 xen_psm_timer_enable, /* psm_timer_enable */ 1673 xen_psm_timer_disable, /* psm_timer_disable */ 1674 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1675 (void (*)(int, int))NULL, /* psm_preshutdown */ 1676 xen_intr_ops, /* Advanced DDI Interrupt framework */ 1677 (int (*)(psm_state_request_t *))NULL, /* psm_state */ 1678 (int (*)(psm_cpu_request_t *))NULL, /* psm_cpu_ops */ 1679 1680 (int (*)(void))NULL, /* psm_get_pir_ipivect */ 1681 (void (*)(processorid_t))NULL, /* psm_send_pir_ipi */ 1682 (void (*)(processorid_t, boolean_t))NULL /* psm_cmci_setup */ 1683 }; 1684 1685 static struct psm_info xen_psm_info = { 1686 PSM_INFO_VER01_5, /* version */ 1687 PSM_OWN_EXCLUSIVE, /* ownership */ 1688 &xen_psm_ops, /* operation */ 1689 "xVM_psm", /* machine name */ 1690 "platform module" /* machine descriptions */ 1691 }; 1692 1693 static void *xen_psm_hdlp; 1694 1695 int 1696 _init(void) 1697 { 1698 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1699 } 1700 1701 int 1702 _fini(void) 1703 { 1704 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1705 } 1706 1707 int 1708 _info(struct modinfo *modinfop) 1709 { 1710 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1711 } 1712