1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #define PSMI_1_7 28 29 #include <sys/mutex.h> 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/clock.h> 33 #include <sys/machlock.h> 34 #include <sys/smp_impldefs.h> 35 #include <sys/uadmin.h> 36 #include <sys/promif.h> 37 #include <sys/psm.h> 38 #include <sys/psm_common.h> 39 #include <sys/atomic.h> 40 #include <sys/apic.h> 41 #include <sys/archsystm.h> 42 #include <sys/mach_intr.h> 43 #include <sys/hypervisor.h> 44 #include <sys/evtchn_impl.h> 45 #include <sys/modctl.h> 46 #include <sys/trap.h> 47 #include <sys/panic.h> 48 #include <sys/sysmacros.h> 49 #include <sys/pci_intr_lib.h> 50 #include <vm/hat_i86.h> 51 52 #include <xen/public/vcpu.h> 53 #include <xen/public/physdev.h> 54 55 56 /* 57 * Global Data 58 */ 59 60 int xen_psm_verbose = 0; 61 62 /* As of now we don't support x2apic in xVM */ 63 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 64 int apic_error = 0; 65 int apic_verbose = 0; 66 cpuset_t apic_cpumask; 67 int apic_forceload = 0; 68 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 69 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 70 }; 71 uchar_t apic_ipltopri[MAXIPL + 1]; 72 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 73 uint_t apic_picinit_called; 74 apic_cpus_info_t *apic_cpus; 75 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 76 /* use to make sure only one cpu handles the nmi */ 77 static lock_t xen_psm_nmi_lock; 78 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 79 int xen_psm_panic_on_nmi = 0; 80 int xen_psm_num_nmis = 0; 81 82 cpuset_t xen_psm_cpus_online; /* online cpus */ 83 int xen_psm_ncpus = 1; /* cpu count */ 84 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 85 86 int xen_support_msi = 0; 87 88 static int xen_clock_irq = INVALID_IRQ; 89 90 /* flag definitions for xen_psm_verbose */ 91 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 92 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 93 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 94 95 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 96 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 97 cmn_err fmt; 98 99 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 100 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 101 prom_printf fmt; 102 103 /* 104 * Dummy apic array to point common routines at that want to do some apic 105 * manipulation. Xen doesn't allow guest apic access so we point at these 106 * memory locations to fake out those who want to do apic fiddling. 107 */ 108 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 109 110 static struct psm_info xen_psm_info; 111 static void xen_psm_setspl(int); 112 113 int 114 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 115 int behavior); 116 int 117 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 118 int behavior); 119 120 /* 121 * Local support routines 122 */ 123 124 /* 125 * Select vcpu to bind xen virtual device interrupt to. 126 */ 127 /*ARGSUSED*/ 128 int 129 xen_psm_bind_intr(int irq) 130 { 131 int bind_cpu; 132 apic_irq_t *irqptr; 133 134 bind_cpu = IRQ_UNBOUND; 135 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 136 return (bind_cpu); 137 if (irq <= APIC_MAX_VECTOR) 138 irqptr = apic_irq_table[irq]; 139 else 140 irqptr = NULL; 141 if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 142 bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 143 if (bind_cpu != IRQ_UNBOUND) { 144 if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 145 bind_cpu = 0; 146 goto done; 147 } 148 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 149 do { 150 bind_cpu = xen_psm_next_bind_cpu++; 151 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 152 xen_psm_next_bind_cpu = 0; 153 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 154 } else { 155 bind_cpu = 0; 156 } 157 done: 158 return (bind_cpu); 159 } 160 161 /* 162 * Autoconfiguration Routines 163 */ 164 165 static int 166 xen_psm_probe(void) 167 { 168 int ret = PSM_SUCCESS; 169 170 if (DOMAIN_IS_INITDOMAIN(xen_info)) 171 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 172 return (ret); 173 } 174 175 static void 176 xen_psm_softinit(void) 177 { 178 /* LINTED logical expression always true: op "||" */ 179 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 180 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 181 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 182 apic_init_common(); 183 } 184 } 185 186 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 187 188 /*ARGSUSED*/ 189 static int 190 xen_psm_clkinit(int hertz) 191 { 192 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 193 extern int dosynctodr; 194 195 /* 196 * domU cannot set the TOD hardware, fault the TOD clock now to 197 * indicate that and turn off attempts to sync TOD hardware 198 * with the hires timer. 199 */ 200 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 201 mutex_enter(&tod_lock); 202 (void) tod_fault(TOD_RDONLY, 0); 203 dosynctodr = 0; 204 mutex_exit(&tod_lock); 205 } 206 /* 207 * The hypervisor provides a timer based on the local APIC timer. 208 * The interface supports requests of nanosecond resolution. 209 * A common frequency of the apic clock is 100 Mhz which 210 * gives a resolution of 10 nsec per tick. What we would really like 211 * is a way to get the ns per tick value from xen. 212 * XXPV - This is an assumption that needs checking and may change 213 */ 214 return (XEN_NSEC_PER_TICK); 215 } 216 217 static void 218 xen_psm_hrtimeinit(void) 219 { 220 extern int gethrtime_hires; 221 gethrtime_hires = 1; 222 } 223 224 /* xen_psm NMI handler */ 225 /*ARGSUSED*/ 226 static void 227 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 228 { 229 xen_psm_num_nmis++; 230 231 if (!lock_try(&xen_psm_nmi_lock)) 232 return; 233 234 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 debug_enter("NMI received: entering kmdb\n"); 236 } else if (xen_psm_panic_on_nmi) { 237 /* Keep panic from entering kmdb. */ 238 nopanicdebug = 1; 239 panic("NMI received\n"); 240 } else { 241 /* 242 * prom_printf is the best shot we have of something which is 243 * problem free from high level/NMI type of interrupts 244 */ 245 prom_printf("NMI received\n"); 246 } 247 248 lock_clear(&xen_psm_nmi_lock); 249 } 250 251 static void 252 xen_psm_picinit() 253 { 254 int cpu, irqno; 255 cpuset_t cpus; 256 257 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 258 /* set a flag so we know we have run xen_psm_picinit() */ 259 apic_picinit_called = 1; 260 LOCK_INIT_CLEAR(&apic_ioapic_lock); 261 262 /* XXPV - do we need to do this? */ 263 picsetup(); /* initialise the 8259 */ 264 265 /* enable apic mode if imcr present */ 266 /* XXPV - do we need to do this either? */ 267 if (apic_imcrp) { 268 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 269 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 270 } 271 272 ioapic_init_intr(IOAPIC_NOMASK); 273 /* 274 * We never called xen_psm_addspl() when the SCI 275 * interrupt was added because that happened before the 276 * PSM module was loaded. Fix that up here by doing 277 * any missed operations (e.g. bind to CPU) 278 */ 279 if ((irqno = apic_sci_vect) > 0) { 280 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 281 CPUSET_ZERO(cpus); 282 CPUSET_OR(cpus, xen_psm_cpus_online); 283 } else { 284 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 285 } 286 ec_set_irq_affinity(irqno, cpus); 287 apic_irq_table[irqno]->airq_temp_cpu = 288 (uchar_t)(cpu & ~IRQ_USER_BOUND); 289 ec_enable_irq(irqno); 290 } 291 } 292 293 /* add nmi handler - least priority nmi handler */ 294 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 295 296 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 297 "xVM_psm NMI handler", (caddr_t)NULL)) 298 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 299 } 300 301 302 /* 303 * generates an interprocessor interrupt to another CPU 304 */ 305 static void 306 xen_psm_send_ipi(int cpun, int ipl) 307 { 308 ulong_t flag = intr_clear(); 309 310 ec_send_ipi(ipl, cpun); 311 intr_restore(flag); 312 } 313 314 /*ARGSUSED*/ 315 static int 316 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 317 { 318 int cpu, ret; 319 cpuset_t cpus; 320 321 /* 322 * We are called at splhi() so we can't call anything that might end 323 * up trying to context switch. 324 */ 325 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 326 DOMAIN_IS_INITDOMAIN(xen_info)) { 327 /* 328 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 329 */ 330 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 331 } else { 332 /* 333 * Set priority/affinity/enable for non PIRQs 334 */ 335 ret = ec_set_irq_priority(irqno, ipl); 336 ASSERT(ret == 0); 337 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 338 CPUSET_ZERO(cpus); 339 CPUSET_OR(cpus, xen_psm_cpus_online); 340 } else { 341 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 342 } 343 ec_set_irq_affinity(irqno, cpus); 344 ec_enable_irq(irqno); 345 } 346 return (ret); 347 } 348 349 /* 350 * Acquire ownership of this irq on this cpu 351 */ 352 void 353 xen_psm_acquire_irq(int irq) 354 { 355 ulong_t flags; 356 int cpuid; 357 358 /* 359 * If the irq is currently being serviced by another cpu 360 * we busy-wait for the other cpu to finish. Take any 361 * pending interrupts before retrying. 362 */ 363 do { 364 flags = intr_clear(); 365 cpuid = ec_block_irq(irq); 366 intr_restore(flags); 367 } while (cpuid != CPU->cpu_id); 368 } 369 370 /*ARGSUSED*/ 371 static int 372 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 373 { 374 apic_irq_t *irqptr; 375 int err = PSM_SUCCESS; 376 377 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 378 DOMAIN_IS_INITDOMAIN(xen_info)) { 379 irqptr = apic_irq_table[irqno]; 380 /* 381 * unbind if no more sharers of this irq/evtchn 382 */ 383 if (irqptr->airq_share == 1) { 384 xen_psm_acquire_irq(irqno); 385 ec_unbind_irq(irqno); 386 } 387 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 388 /* 389 * If still in use reset priority 390 */ 391 if (!err && irqptr->airq_share != 0) { 392 err = ec_set_irq_priority(irqno, max_ipl); 393 return (err); 394 } 395 } else { 396 xen_psm_acquire_irq(irqno); 397 ec_unbind_irq(irqno); 398 } 399 return (err); 400 } 401 402 static processorid_t 403 xen_psm_get_next_processorid(processorid_t id) 404 { 405 if (id == -1) 406 return (0); 407 408 for (id++; id < NCPU; id++) { 409 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 410 case 0: /* yeah, that one's there */ 411 return (id); 412 default: 413 case X_EINVAL: /* out of range */ 414 return (-1); 415 case X_ENOENT: /* not present in the domain */ 416 /* 417 * It's not clear that we -need- to keep looking 418 * at this point, if, e.g., we can guarantee 419 * the hypervisor always keeps a contiguous range 420 * of vcpus around this is equivalent to "out of range". 421 * 422 * But it would be sad to miss a vcpu we're 423 * supposed to be using .. 424 */ 425 break; 426 } 427 } 428 429 return (-1); 430 } 431 432 /* 433 * XXPV - undo the start cpu op change; return to ignoring this value 434 * - also tweak error handling in main startup loop 435 */ 436 /*ARGSUSED*/ 437 static int 438 xen_psm_cpu_start(processorid_t id, caddr_t arg) 439 { 440 int ret; 441 442 ASSERT(id > 0); 443 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 444 ec_bind_cpu_ipis(id); 445 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 446 if ((ret = xen_vcpu_up(id)) == 0) 447 xen_psm_ncpus++; 448 else 449 ret = EINVAL; 450 return (ret); 451 } 452 453 /* 454 * Allocate an irq for inter cpu signaling 455 */ 456 /*ARGSUSED*/ 457 static int 458 xen_psm_get_ipivect(int ipl, int type) 459 { 460 return (ec_bind_ipi_to_irq(ipl, 0)); 461 } 462 463 /*ARGSUSED*/ 464 static int 465 xen_psm_get_clockirq(int ipl) 466 { 467 if (xen_clock_irq != INVALID_IRQ) 468 return (xen_clock_irq); 469 470 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 471 return (xen_clock_irq); 472 } 473 474 /*ARGSUSED*/ 475 static void 476 xen_psm_shutdown(int cmd, int fcn) 477 { 478 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 479 480 switch (cmd) { 481 case A_SHUTDOWN: 482 switch (fcn) { 483 case AD_BOOT: 484 case AD_IBOOT: 485 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 486 break; 487 case AD_POWEROFF: 488 /* fall through if domU or if poweroff fails */ 489 if (DOMAIN_IS_INITDOMAIN(xen_info)) 490 if (apic_enable_acpi) 491 (void) acpi_poweroff(); 492 /* FALLTHRU */ 493 case AD_HALT: 494 default: 495 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 496 break; 497 } 498 break; 499 case A_REBOOT: 500 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 501 break; 502 default: 503 return; 504 } 505 } 506 507 508 static int 509 xen_psm_translate_irq(dev_info_t *dip, int irqno) 510 { 511 if (dip == NULL) { 512 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 513 " dip = NULL\n", irqno)); 514 return (irqno); 515 } 516 return (irqno); 517 } 518 519 /* 520 * xen_psm_intr_enter() acks the event that triggered the interrupt and 521 * returns the new priority level, 522 */ 523 /*ARGSUSED*/ 524 static int 525 xen_psm_intr_enter(int ipl, int *vector) 526 { 527 int newipl; 528 uint_t intno; 529 cpu_t *cpu = CPU; 530 531 intno = (*vector); 532 533 ASSERT(intno < NR_IRQS); 534 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 535 536 if (!ec_is_edge_pirq(intno)) 537 ec_clear_irq(intno); 538 539 newipl = autovect[intno].avh_hi_pri; 540 if (newipl == 0) { 541 /* 542 * (newipl == 0) means we have no service routines for this 543 * vector. We will treat this as a spurious interrupt. 544 * We have cleared the pending bit already, clear the event 545 * mask and return a spurious interrupt. This case can happen 546 * when an interrupt delivery is racing with the removal of 547 * of the service routine for that interrupt. 548 */ 549 ec_unmask_irq(intno); 550 newipl = -1; /* flag spurious interrupt */ 551 } else if (newipl <= cpu->cpu_pri) { 552 /* 553 * (newipl <= cpu->cpu_pri) means that we must be trying to 554 * service a vector that was shared with a higher priority 555 * isr. The higher priority handler has been removed and 556 * we need to service this int. We can't return a lower 557 * priority than current cpu priority. Just synthesize a 558 * priority to return that should be acceptable. 559 * It should never happen that we synthesize a priority that 560 * moves us from low-priority to high-priority that would make 561 * a us incorrectly run on the high priority stack. 562 */ 563 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 564 ASSERT(newipl != LOCK_LEVEL + 1); 565 } 566 return (newipl); 567 } 568 569 570 /* 571 * xen_psm_intr_exit() restores the old interrupt 572 * priority level after processing an interrupt. 573 * It is called with interrupts disabled, and does not enable interrupts. 574 */ 575 /* ARGSUSED */ 576 static void 577 xen_psm_intr_exit(int ipl, int vector) 578 { 579 ec_try_unmask_irq(vector); 580 xen_psm_setspl(ipl); 581 } 582 583 intr_exit_fn_t 584 psm_intr_exit_fn(void) 585 { 586 return (xen_psm_intr_exit); 587 } 588 589 /* 590 * Check if new ipl level allows delivery of previously unserviced events 591 */ 592 static void 593 xen_psm_setspl(int ipl) 594 { 595 struct cpu *cpu = CPU; 596 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 597 uint16_t pending; 598 599 ASSERT(vci->evtchn_upcall_mask != 0); 600 601 /* 602 * If new ipl level will enable any pending interrupts, setup so the 603 * upcoming sti will cause us to get an upcall. 604 */ 605 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 606 if (pending) { 607 int i; 608 ulong_t pending_sels = 0; 609 volatile ulong_t *selp; 610 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 611 612 for (i = bsrw_insn(pending); i > ipl; i--) 613 pending_sels |= cpe->pending_sel[i]; 614 ASSERT(pending_sels); 615 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 616 atomic_or_ulong(selp, pending_sels); 617 vci->evtchn_upcall_pending = 1; 618 } 619 } 620 621 /* 622 * This function provides external interface to the nexus for all 623 * functionality related to the new DDI interrupt framework. 624 * 625 * Input: 626 * dip - pointer to the dev_info structure of the requested device 627 * hdlp - pointer to the internal interrupt handle structure for the 628 * requested interrupt 629 * intr_op - opcode for this call 630 * result - pointer to the integer that will hold the result to be 631 * passed back if return value is PSM_SUCCESS 632 * 633 * Output: 634 * return value is either PSM_SUCCESS or PSM_FAILURE 635 */ 636 int 637 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 638 psm_intr_op_t intr_op, int *result) 639 { 640 int cap; 641 int err; 642 int new_priority; 643 apic_irq_t *irqp; 644 struct intrspec *ispec; 645 646 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 647 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 648 649 switch (intr_op) { 650 case PSM_INTR_OP_CHECK_MSI: 651 /* 652 * Till PCI passthru is supported, only dom0 has MSI/MSIX 653 */ 654 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 655 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 656 DDI_INTR_TYPE_MSIX); 657 break; 658 } 659 /* 660 * Check MSI/X is supported or not at APIC level and 661 * masked off the MSI/X bits in hdlp->ih_type if not 662 * supported before return. If MSI/X is supported, 663 * leave the ih_type unchanged and return. 664 * 665 * hdlp->ih_type passed in from the nexus has all the 666 * interrupt types supported by the device. 667 */ 668 if (xen_support_msi == 0) { 669 /* 670 * if xen_support_msi is not set, call 671 * apic_check_msi_support() to check whether msi 672 * is supported first 673 */ 674 if (apic_check_msi_support() == PSM_SUCCESS) 675 xen_support_msi = 1; 676 else 677 xen_support_msi = -1; 678 } 679 if (xen_support_msi == 1) 680 *result = hdlp->ih_type; 681 else 682 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 683 DDI_INTR_TYPE_MSIX); 684 break; 685 case PSM_INTR_OP_ALLOC_VECTORS: 686 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 687 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 688 hdlp->ih_scratch1, hdlp->ih_pri, 689 (int)(uintptr_t)hdlp->ih_scratch2); 690 else 691 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 692 hdlp->ih_scratch1, hdlp->ih_pri, 693 (int)(uintptr_t)hdlp->ih_scratch2); 694 break; 695 case PSM_INTR_OP_FREE_VECTORS: 696 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 697 hdlp->ih_pri, hdlp->ih_type); 698 break; 699 case PSM_INTR_OP_NAVAIL_VECTORS: 700 /* 701 * XXPV - maybe we should make this be: 702 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 703 */ 704 if (DOMAIN_IS_INITDOMAIN(xen_info)) 705 *result = APIC_VECTOR_PER_IPL; 706 else 707 *result = 1; 708 break; 709 case PSM_INTR_OP_XLATE_VECTOR: 710 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 711 if (ispec->intrspec_vec >= PIRQ_BASE && 712 ispec->intrspec_vec < NR_PIRQS && 713 DOMAIN_IS_INITDOMAIN(xen_info)) { 714 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 715 } else { 716 *result = ispec->intrspec_vec; 717 } 718 break; 719 case PSM_INTR_OP_GET_PENDING: 720 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 721 *result = ec_pending_irq(hdlp->ih_vector); 722 break; 723 case PSM_INTR_OP_CLEAR_MASK: 724 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 725 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 726 return (PSM_FAILURE); 727 ec_enable_irq(hdlp->ih_vector); 728 break; 729 case PSM_INTR_OP_SET_MASK: 730 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 731 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 732 return (PSM_FAILURE); 733 ec_disable_irq(hdlp->ih_vector); 734 break; 735 case PSM_INTR_OP_GET_CAP: 736 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 737 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 738 cap |= DDI_INTR_FLAG_MASKABLE; 739 *result = cap; 740 break; 741 case PSM_INTR_OP_GET_SHARED: 742 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 743 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 744 return (PSM_FAILURE); 745 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 746 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 747 == NULL) 748 return (PSM_FAILURE); 749 *result = (irqp->airq_share > 1) ? 1: 0; 750 } else { 751 return (PSM_FAILURE); 752 } 753 break; 754 case PSM_INTR_OP_SET_PRI: 755 new_priority = *(int *)result; 756 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 757 if (err != 0) 758 return (PSM_FAILURE); 759 break; 760 case PSM_INTR_OP_GET_INTR: 761 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 762 return (PSM_FAILURE); 763 /* 764 * The interrupt handle given here has been allocated 765 * specifically for this command, and ih_private carries 766 * a pointer to a apic_get_intr_t. 767 */ 768 if (apic_get_vector_intr_info( 769 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 770 return (PSM_FAILURE); 771 break; 772 case PSM_INTR_OP_SET_CAP: 773 /* FALLTHRU */ 774 default: 775 return (PSM_FAILURE); 776 } 777 return (PSM_SUCCESS); 778 } 779 780 static void 781 xen_psm_rebind_irq(int irq) 782 { 783 cpuset_t ncpu; 784 processorid_t newcpu; 785 apic_irq_t *irqptr; 786 787 newcpu = xen_psm_bind_intr(irq); 788 if (newcpu == IRQ_UNBOUND) { 789 CPUSET_ZERO(ncpu); 790 CPUSET_OR(ncpu, xen_psm_cpus_online); 791 } else { 792 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 793 } 794 ec_set_irq_affinity(irq, ncpu); 795 if (irq <= APIC_MAX_VECTOR) { 796 irqptr = apic_irq_table[irq]; 797 ASSERT(irqptr != NULL); 798 irqptr->airq_temp_cpu = (uchar_t)newcpu; 799 } 800 } 801 802 /* 803 * Disable all device interrupts for the given cpu. 804 * High priority interrupts are not disabled and will still be serviced. 805 */ 806 static int 807 xen_psm_disable_intr(processorid_t cpun) 808 { 809 int irq; 810 811 /* 812 * Can't offline VCPU 0 on this hypervisor. There's no reason 813 * anyone would want to given that the CPUs are virtual. Also note 814 * that the hypervisor requires suspend/resume to be on VCPU 0. 815 */ 816 if (cpun == 0) 817 return (PSM_FAILURE); 818 819 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 820 for (irq = 0; irq < NR_IRQS; irq++) { 821 if (!ec_irq_needs_rebind(irq, cpun)) 822 continue; 823 xen_psm_rebind_irq(irq); 824 } 825 return (PSM_SUCCESS); 826 } 827 828 static void 829 xen_psm_enable_intr(processorid_t cpun) 830 { 831 int irq; 832 833 if (cpun == 0) 834 return; 835 836 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 837 838 /* 839 * Rebalance device interrupts among online processors 840 */ 841 for (irq = 0; irq < NR_IRQS; irq++) { 842 if (!ec_irq_rebindable(irq)) 843 continue; 844 xen_psm_rebind_irq(irq); 845 } 846 847 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 848 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 849 } 850 } 851 852 static int 853 xen_psm_post_cpu_start() 854 { 855 processorid_t cpun; 856 857 cpun = psm_get_cpu_id(); 858 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 859 /* 860 * Non-virtualized environments can call psm_post_cpu_start 861 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 862 * xen_psm_post_cpu_start() is only called from boot. 863 */ 864 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 865 } 866 return (PSM_SUCCESS); 867 } 868 869 /* 870 * This function will reprogram the timer. 871 * 872 * When in oneshot mode the argument is the absolute time in future at which to 873 * generate the interrupt. 874 * 875 * When in periodic mode, the argument is the interval at which the 876 * interrupts should be generated. There is no need to support the periodic 877 * mode timer change at this time. 878 * 879 * Note that we must be careful to convert from hrtime to Xen system time (see 880 * xpv_timestamp.c). 881 */ 882 static void 883 xen_psm_timer_reprogram(hrtime_t timer_req) 884 { 885 hrtime_t now, timer_new, time_delta, xen_time; 886 ulong_t flags; 887 888 flags = intr_clear(); 889 /* 890 * We should be called from high PIL context (CBE_HIGH_PIL), 891 * so kpreempt is disabled. 892 */ 893 894 now = xpv_gethrtime(); 895 xen_time = xpv_getsystime(); 896 if (timer_req <= now) { 897 /* 898 * requested to generate an interrupt in the past 899 * generate an interrupt as soon as possible 900 */ 901 time_delta = XEN_NSEC_PER_TICK; 902 } else 903 time_delta = timer_req - now; 904 905 timer_new = xen_time + time_delta; 906 if (HYPERVISOR_set_timer_op(timer_new) != 0) 907 panic("can't set hypervisor timer?"); 908 intr_restore(flags); 909 } 910 911 /* 912 * This function will enable timer interrupts. 913 */ 914 static void 915 xen_psm_timer_enable(void) 916 { 917 ec_unmask_irq(xen_clock_irq); 918 } 919 920 /* 921 * This function will disable timer interrupts on the current cpu. 922 */ 923 static void 924 xen_psm_timer_disable(void) 925 { 926 (void) ec_block_irq(xen_clock_irq); 927 /* 928 * If the clock irq is pending on this cpu then we need to 929 * clear the pending interrupt. 930 */ 931 ec_unpend_irq(xen_clock_irq); 932 } 933 934 /* 935 * 936 * The following functions are in the platform specific file so that they 937 * can be different functions depending on whether we are running on 938 * bare metal or a hypervisor. 939 */ 940 941 /* 942 * Allocate a free vector for irq at ipl. 943 */ 944 /* ARGSUSED */ 945 uchar_t 946 apic_allocate_vector(int ipl, int irq, int pri) 947 { 948 physdev_irq_t irq_op; 949 uchar_t vector; 950 int rc; 951 952 irq_op.irq = irq; 953 954 if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 955 != 0) 956 panic("Hypervisor alloc vector failed err: %d", -rc); 957 vector = irq_op.vector; 958 /* 959 * No need to worry about vector colliding with our reserved vectors 960 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 961 * generated traps and handle them properly. 962 */ 963 apic_vector_to_irq[vector] = (uchar_t)irq; 964 return (vector); 965 } 966 967 /* Mark vector as not being used by any irq */ 968 void 969 apic_free_vector(uchar_t vector) 970 { 971 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 972 } 973 974 /* 975 * This function returns the no. of vectors available for the pri. 976 * dip is not used at this moment. If we really don't need that, 977 * it will be removed. Since priority is not limited by hardware 978 * when running on the hypervisor we simply return the maximum no. 979 * of available contiguous vectors. 980 */ 981 /*ARGSUSED*/ 982 int 983 apic_navail_vector(dev_info_t *dip, int pri) 984 { 985 int lowest, highest, i, navail, count; 986 987 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 988 (void *)dip, pri)); 989 990 highest = APIC_MAX_VECTOR; 991 lowest = APIC_BASE_VECT; 992 navail = count = 0; 993 994 /* It has to be contiguous */ 995 for (i = lowest; i < highest; i++) { 996 count = 0; 997 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 998 (i < highest)) { 999 count++; 1000 i++; 1001 } 1002 if (count > navail) 1003 navail = count; 1004 } 1005 return (navail); 1006 } 1007 1008 static physdev_manage_pci_t *managed_devlist; 1009 static int mdev_cnt; 1010 static int mdev_size = 128; 1011 static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1012 1013 /* 1014 * Add devfn on given bus to devices managed by hypervisor 1015 */ 1016 static int 1017 xen_manage_device(uint8_t bus, uint8_t devfn) 1018 { 1019 physdev_manage_pci_t manage_pci, *newlist; 1020 int rc, i, oldsize; 1021 1022 /* 1023 * Check if bus/devfn already managed. If so just return success. 1024 */ 1025 if (managed_devlist == NULL) { 1026 managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1027 mdev_size, KM_NOSLEEP); 1028 if (managed_devlist == NULL) { 1029 cmn_err(CE_WARN, 1030 "Can't alloc space for managed device list"); 1031 return (0); 1032 } 1033 }; 1034 for (i = 0; i < mdev_cnt; i++) { 1035 if (managed_devlist[i].bus == bus && 1036 managed_devlist[i].devfn == devfn) 1037 return (1); /* device already managed */ 1038 } 1039 manage_pci.bus = bus; 1040 manage_pci.devfn = devfn; 1041 rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1042 if (rc < 0) { 1043 cmn_err(CE_WARN, 1044 "hypervisor add pci device call failed bus:0x%x" 1045 " devfn:0x%x", bus, devfn); 1046 return (0); 1047 } 1048 /* 1049 * Add device to the managed device list 1050 */ 1051 if (i == mdev_size) { 1052 /* 1053 * grow the managed device list 1054 */ 1055 oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1056 mdev_size *= 2; 1057 newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1058 KM_NOSLEEP); 1059 if (newlist == NULL) { 1060 cmn_err(CE_WARN, "Can't grow managed device list"); 1061 return (0); 1062 } 1063 bcopy(managed_devlist, newlist, oldsize); 1064 kmem_free(managed_devlist, oldsize); 1065 managed_devlist = newlist; 1066 } 1067 managed_devlist[i].bus = bus; 1068 managed_devlist[i].devfn = devfn; 1069 mdev_cnt++; 1070 return (1); 1071 } 1072 1073 /* 1074 * allocate an apic irq struct for an MSI interrupt 1075 */ 1076 static int 1077 msi_allocate_irq(int irq) 1078 { 1079 apic_irq_t *irqptr = apic_irq_table[irq]; 1080 1081 if (irqptr == NULL) { 1082 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1083 if (irqptr == NULL) { 1084 cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1085 return (-1); 1086 } 1087 apic_irq_table[irq] = irqptr; 1088 } else { 1089 if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1090 irqptr->airq_mps_intr_index = FREE_INDEX; 1091 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1092 cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1093 return (-1); 1094 } 1095 } 1096 irqptr->airq_mps_intr_index = FREE_INDEX; 1097 return (irq); 1098 } 1099 1100 /* 1101 * read MSI/MSIX vector out of config space 1102 */ 1103 static uchar_t 1104 xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1105 { 1106 uint64_t msi_data = 0; 1107 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1108 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1109 ushort_t msi_ctrl; 1110 uchar_t vector; 1111 1112 ASSERT((handle != NULL) && (cap_ptr != 0)); 1113 if (type == DDI_INTR_TYPE_MSI) { 1114 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1115 /* 1116 * Get vector 1117 */ 1118 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1119 msi_data = pci_config_get16(handle, 1120 cap_ptr + PCI_MSI_64BIT_DATA); 1121 } else { 1122 msi_data = pci_config_get16(handle, 1123 cap_ptr + PCI_MSI_32BIT_DATA); 1124 } 1125 vector = (msi_data & 0xff) + entry; 1126 } else if (type == DDI_INTR_TYPE_MSIX) { 1127 uintptr_t off; 1128 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1129 1130 /* Offset into the given entry in the MSI-X table */ 1131 off = (uintptr_t)msix_p->msix_tbl_addr + 1132 (entry * PCI_MSIX_VECTOR_SIZE); 1133 1134 msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1135 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1136 vector = msi_data & 0xff; 1137 } 1138 return (vector); 1139 } 1140 1141 1142 static void 1143 get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1144 { 1145 pci_regspec_t *regspec; 1146 int reglen; 1147 1148 /* 1149 * Get device reg spec, first word has PCI bus and 1150 * device/function info we need. 1151 */ 1152 if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1153 (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1154 cmn_err(CE_WARN, 1155 "get_busdevfn() failed to get regspec."); 1156 return; 1157 } 1158 /* 1159 * get PCI bus # from reg spec for device 1160 */ 1161 *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1162 /* 1163 * get combined device/function from reg spec for device. 1164 */ 1165 *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1166 PCI_REG_FUNC_SHIFT; 1167 1168 kmem_free(regspec, reglen); 1169 } 1170 1171 /* 1172 * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1173 */ 1174 int 1175 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1176 int behavior) 1177 { 1178 int rcount, i, rc, irqno; 1179 uchar_t vector, cpu; 1180 major_t major; 1181 apic_irq_t *irqptr; 1182 physdev_map_pirq_t map_irq; 1183 int busnum, devfn; 1184 1185 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1186 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1187 (void *)dip, inum, pri, count, behavior)); 1188 1189 if (count > 1) { 1190 if (behavior == DDI_INTR_ALLOC_STRICT && 1191 apic_multi_msi_enable == 0) 1192 return (0); 1193 if (apic_multi_msi_enable == 0) 1194 count = 1; 1195 } 1196 1197 if ((rcount = apic_navail_vector(dip, pri)) > count) 1198 rcount = count; 1199 else if (rcount == 0 || (rcount < count && 1200 behavior == DDI_INTR_ALLOC_STRICT)) 1201 return (0); 1202 1203 /* if not ISP2, then round it down */ 1204 if (!ISP2(rcount)) 1205 rcount = 1 << (highbit(rcount) - 1); 1206 1207 /* 1208 * get PCI bus # and devfn from reg spec for device 1209 */ 1210 get_busdevfn(dip, &busnum, &devfn); 1211 1212 /* 1213 * Tell xen about this pci device 1214 */ 1215 if (!xen_manage_device(busnum, devfn)) 1216 return (0); 1217 1218 mutex_enter(&airq_mutex); 1219 1220 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1221 for (i = 0; i < rcount; i++) { 1222 /* 1223 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1224 */ 1225 map_irq.domid = DOMID_SELF; 1226 map_irq.type = MAP_PIRQ_TYPE_MSI; 1227 map_irq.index = -rcount; /* hypervisor auto allocates vectors */ 1228 map_irq.pirq = -1; 1229 map_irq.bus = busnum; 1230 map_irq.devfn = devfn; 1231 map_irq.entry_nr = i; 1232 map_irq.table_base = 0; 1233 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1234 irqno = map_irq.pirq; 1235 if (rc < 0) { 1236 mutex_exit(&airq_mutex); 1237 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1238 return (i); 1239 } 1240 if (irqno < 0) { 1241 mutex_exit(&airq_mutex); 1242 cmn_err(CE_NOTE, 1243 "!hypervisor not configured for MSI support"); 1244 xen_support_msi = -1; 1245 return (0); 1246 } 1247 1248 /* 1249 * Find out what vector the hypervisor assigned 1250 */ 1251 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i); 1252 1253 if (msi_allocate_irq(irqno) < 0) { 1254 mutex_exit(&airq_mutex); 1255 return (i); 1256 } 1257 apic_max_device_irq = max(irqno, apic_max_device_irq); 1258 apic_min_device_irq = min(irqno, apic_min_device_irq); 1259 irqptr = apic_irq_table[irqno]; 1260 ASSERT(irqptr != NULL); 1261 #ifdef DEBUG 1262 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1263 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1264 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1265 #endif 1266 apic_vector_to_irq[vector] = (uchar_t)irqno; 1267 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1268 1269 irqptr->airq_vector = vector; 1270 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1271 irqptr->airq_intin_no = (uchar_t)rcount; 1272 irqptr->airq_ipl = pri; 1273 irqptr->airq_origirq = (uchar_t)(inum + i); 1274 irqptr->airq_share_id = 0; 1275 irqptr->airq_mps_intr_index = MSI_INDEX; 1276 irqptr->airq_dip = dip; 1277 irqptr->airq_major = major; 1278 if (i == 0) /* they all bind to the same cpu */ 1279 cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1280 else 1281 irqptr->airq_cpu = cpu; 1282 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1283 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1284 (void *)irqptr->airq_dip, irqptr->airq_vector, 1285 irqptr->airq_origirq, pri)); 1286 } 1287 mutex_exit(&airq_mutex); 1288 return (rcount); 1289 } 1290 1291 /* 1292 * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1293 */ 1294 int 1295 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1296 int behavior) 1297 { 1298 int rcount, i, rc; 1299 major_t major; 1300 physdev_map_pirq_t map_irq; 1301 int busnum, devfn; 1302 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1303 uint64_t table_base; 1304 pfn_t pfnum; 1305 1306 if (msix_p == NULL) { 1307 msix_p = pci_msix_init(dip); 1308 if (msix_p != NULL) { 1309 i_ddi_set_msix(dip, msix_p); 1310 } else { 1311 cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1312 " msix_init failed"); 1313 return (0); 1314 } 1315 } 1316 /* 1317 * Hypervisor wants PCI config space address of msix table base 1318 */ 1319 pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1320 ~PFN_IS_FOREIGN_MFN; 1321 table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset | 1322 ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1323 /* 1324 * get PCI bus # and devfn from reg spec for device 1325 */ 1326 get_busdevfn(dip, &busnum, &devfn); 1327 1328 /* 1329 * Tell xen about this pci device 1330 */ 1331 if (!xen_manage_device(busnum, devfn)) 1332 return (0); 1333 mutex_enter(&airq_mutex); 1334 1335 if ((rcount = apic_navail_vector(dip, pri)) > count) 1336 rcount = count; 1337 else if (rcount == 0 || (rcount < count && 1338 behavior == DDI_INTR_ALLOC_STRICT)) { 1339 rcount = 0; 1340 goto out; 1341 } 1342 1343 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1344 for (i = 0; i < rcount; i++) { 1345 int irqno; 1346 uchar_t vector; 1347 apic_irq_t *irqptr; 1348 1349 /* 1350 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1351 */ 1352 map_irq.domid = DOMID_SELF; 1353 map_irq.type = MAP_PIRQ_TYPE_MSI; 1354 map_irq.index = -1; /* hypervisor auto allocates vector */ 1355 map_irq.pirq = -1; 1356 map_irq.bus = busnum; 1357 map_irq.devfn = devfn; 1358 map_irq.entry_nr = i; 1359 map_irq.table_base = table_base; 1360 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1361 irqno = map_irq.pirq; 1362 if (rc < 0) { 1363 mutex_exit(&airq_mutex); 1364 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1365 return (i); 1366 } 1367 if (irqno < 0) { 1368 mutex_exit(&airq_mutex); 1369 cmn_err(CE_NOTE, 1370 "!hypervisor not configured for MSI support"); 1371 xen_support_msi = -1; 1372 return (0); 1373 } 1374 /* 1375 * Find out what vector the hypervisor assigned 1376 */ 1377 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1378 1379 if (msi_allocate_irq(irqno) < 0) { 1380 mutex_exit(&airq_mutex); 1381 return (i); 1382 } 1383 apic_vector_to_irq[vector] = (uchar_t)irqno; 1384 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1385 apic_max_device_irq = max(irqno, apic_max_device_irq); 1386 apic_min_device_irq = min(irqno, apic_min_device_irq); 1387 irqptr = apic_irq_table[irqno]; 1388 ASSERT(irqptr != NULL); 1389 irqptr->airq_vector = (uchar_t)vector; 1390 irqptr->airq_ipl = pri; 1391 irqptr->airq_origirq = (uchar_t)(inum + i); 1392 irqptr->airq_share_id = 0; 1393 irqptr->airq_mps_intr_index = MSIX_INDEX; 1394 irqptr->airq_dip = dip; 1395 irqptr->airq_major = major; 1396 irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1397 } 1398 out: 1399 mutex_exit(&airq_mutex); 1400 return (rcount); 1401 } 1402 1403 1404 /* 1405 * This finds the apic_irq_t associated with the dip, ispec and type. 1406 * The entry should have already been freed, but it can not have been 1407 * reused yet since the hypervisor can not have reassigned the pirq since 1408 * we have not freed that yet. 1409 */ 1410 static apic_irq_t * 1411 msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1412 { 1413 apic_irq_t *irqp; 1414 int i; 1415 1416 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1417 if ((irqp = apic_irq_table[i]) == NULL) 1418 continue; 1419 if ((irqp->airq_dip == dip) && 1420 (irqp->airq_origirq == ispec->intrspec_vec) && 1421 (irqp->airq_ipl == ispec->intrspec_pri)) { 1422 return (irqp); 1423 } 1424 } 1425 return (NULL); 1426 } 1427 1428 void 1429 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1430 { 1431 int i, rc; 1432 physdev_unmap_pirq_t unmap_pirq; 1433 apic_irq_t *irqptr; 1434 struct intrspec ispec; 1435 1436 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1437 "count: %x pri: %x type: %x\n", 1438 (void *)dip, inum, count, pri, type)); 1439 1440 /* for MSI/X only */ 1441 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1442 return; 1443 1444 for (i = 0; i < count; i++) { 1445 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1446 "pri=0x%x count=0x%x\n", inum, pri, count)); 1447 ispec.intrspec_vec = inum + i; 1448 ispec.intrspec_pri = pri; 1449 if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1450 cmn_err(CE_WARN, 1451 "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1452 ddi_get_name(dip), ddi_get_name_addr(dip), 1453 (void *)dip, inum + i, pri); 1454 continue; 1455 } 1456 /* 1457 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1458 */ 1459 unmap_pirq.domid = DOMID_SELF; 1460 unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1461 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1462 if (rc < 0) { 1463 cmn_err(CE_WARN, "unmap pirq failed"); 1464 return; 1465 } 1466 irqptr->airq_mps_intr_index = FREE_INDEX; 1467 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1468 } 1469 } 1470 1471 /* 1472 * The hypervisor doesn't permit access to local apics directly 1473 */ 1474 /* ARGSUSED */ 1475 uint32_t * 1476 mapin_apic(uint32_t addr, size_t len, int flags) 1477 { 1478 /* 1479 * Return a pointer to a memory area to fake out the 1480 * probe code that wants to read apic registers. 1481 * The dummy values will end up being ignored by xen 1482 * later on when they are used anyway. 1483 */ 1484 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1485 return (xen_psm_dummy_apic); 1486 } 1487 1488 /* ARGSUSED */ 1489 uint32_t * 1490 mapin_ioapic(uint32_t addr, size_t len, int flags) 1491 { 1492 /* 1493 * Return non-null here to fake out configure code that calls this. 1494 * The i86xpv platform will not reference through the returned value.. 1495 */ 1496 return ((uint32_t *)0x1); 1497 } 1498 1499 /* ARGSUSED */ 1500 void 1501 mapout_apic(caddr_t addr, size_t len) 1502 { 1503 } 1504 1505 /* ARGSUSED */ 1506 void 1507 mapout_ioapic(caddr_t addr, size_t len) 1508 { 1509 } 1510 1511 uint32_t 1512 ioapic_read(int apic_ix, uint32_t reg) 1513 { 1514 physdev_apic_t apic; 1515 1516 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1517 apic.reg = reg; 1518 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1519 panic("read ioapic %d reg %d failed", apic_ix, reg); 1520 return (apic.value); 1521 } 1522 1523 void 1524 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1525 { 1526 physdev_apic_t apic; 1527 1528 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1529 apic.reg = reg; 1530 apic.value = value; 1531 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1532 panic("write ioapic %d reg %d failed", apic_ix, reg); 1533 } 1534 1535 /* 1536 * This function was added as part of x2APIC support in pcplusmp. 1537 */ 1538 void 1539 ioapic_write_eoi(int apic_ix, uint32_t value) 1540 { 1541 physdev_apic_t apic; 1542 1543 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1544 apic.reg = APIC_IO_EOI; 1545 apic.value = value; 1546 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1547 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1548 } 1549 1550 /* 1551 * This function was added as part of x2APIC support in pcplusmp to resolve 1552 * undefined symbol in xpv_psm. 1553 */ 1554 void 1555 x2apic_update_psm() 1556 { 1557 } 1558 1559 /* 1560 * This function was added as part of x2APIC support in pcplusmp to resolve 1561 * undefined symbol in xpv_psm. 1562 */ 1563 void 1564 apic_ret() 1565 { 1566 } 1567 1568 /* 1569 * Call rebind to do the actual programming. 1570 */ 1571 int 1572 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1573 { 1574 apic_irq_t *irqptr; 1575 struct ioapic_reprogram_data *drep = NULL; 1576 int rv, cpu; 1577 cpuset_t cpus; 1578 1579 if (deferred) { 1580 drep = (struct ioapic_reprogram_data *)p; 1581 ASSERT(drep != NULL); 1582 irqptr = drep->irqp; 1583 } else { 1584 irqptr = (apic_irq_t *)p; 1585 } 1586 ASSERT(irqptr != NULL); 1587 /* 1588 * Set cpu based on xen idea of online cpu's not apic tables. 1589 * Note that xen ignores/sets to it's own preferred value the 1590 * target cpu field when programming ioapic anyway. 1591 */ 1592 if (irqptr->airq_mps_intr_index == MSI_INDEX) 1593 cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1594 else { 1595 cpu = xen_psm_bind_intr(irq); 1596 irqptr->airq_cpu = cpu; 1597 } 1598 if (cpu == IRQ_UNBOUND) { 1599 CPUSET_ZERO(cpus); 1600 CPUSET_OR(cpus, xen_psm_cpus_online); 1601 } else { 1602 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1603 } 1604 rv = apic_rebind(irqptr, cpu, drep); 1605 if (rv) { 1606 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1607 cpu = 0; 1608 irqptr->airq_cpu = cpu; 1609 rv = apic_rebind(irqptr, cpu, drep); 1610 } 1611 /* 1612 * If rebind successful bind the irq to an event channel 1613 */ 1614 if (rv == 0) { 1615 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1616 CPUSET_FIND(cpus, cpu); 1617 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1618 } 1619 return (rv); 1620 } 1621 1622 /* 1623 * Allocate a new vector for the given irq 1624 */ 1625 /* ARGSUSED */ 1626 uchar_t 1627 apic_modify_vector(uchar_t vector, int irq) 1628 { 1629 return (apic_allocate_vector(0, irq, 0)); 1630 } 1631 1632 /* 1633 * The rest of the file is just generic psm module boilerplate 1634 */ 1635 1636 static struct psm_ops xen_psm_ops = { 1637 xen_psm_probe, /* psm_probe */ 1638 1639 xen_psm_softinit, /* psm_init */ 1640 xen_psm_picinit, /* psm_picinit */ 1641 xen_psm_intr_enter, /* psm_intr_enter */ 1642 xen_psm_intr_exit, /* psm_intr_exit */ 1643 xen_psm_setspl, /* psm_setspl */ 1644 xen_psm_addspl, /* psm_addspl */ 1645 xen_psm_delspl, /* psm_delspl */ 1646 xen_psm_disable_intr, /* psm_disable_intr */ 1647 xen_psm_enable_intr, /* psm_enable_intr */ 1648 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1649 (void (*)(int))NULL, /* psm_set_softintr */ 1650 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1651 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1652 1653 xen_psm_clkinit, /* psm_clkinit */ 1654 xen_psm_get_clockirq, /* psm_get_clockirq */ 1655 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1656 xpv_gethrtime, /* psm_gethrtime */ 1657 1658 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1659 xen_psm_cpu_start, /* psm_cpu_start */ 1660 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1661 xen_psm_shutdown, /* psm_shutdown */ 1662 xen_psm_get_ipivect, /* psm_get_ipivect */ 1663 xen_psm_send_ipi, /* psm_send_ipi */ 1664 1665 xen_psm_translate_irq, /* psm_translate_irq */ 1666 1667 (void (*)(int, char *))NULL, /* psm_notify_error */ 1668 (void (*)(int msg))NULL, /* psm_notify_func */ 1669 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1670 xen_psm_timer_enable, /* psm_timer_enable */ 1671 xen_psm_timer_disable, /* psm_timer_disable */ 1672 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1673 (void (*)(int, int))NULL, /* psm_preshutdown */ 1674 xen_intr_ops, /* Advanced DDI Interrupt framework */ 1675 (int (*)(psm_state_request_t *))NULL, /* psm_state */ 1676 (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */ 1677 }; 1678 1679 static struct psm_info xen_psm_info = { 1680 PSM_INFO_VER01_5, /* version */ 1681 PSM_OWN_EXCLUSIVE, /* ownership */ 1682 &xen_psm_ops, /* operation */ 1683 "xVM_psm", /* machine name */ 1684 "platform module" /* machine descriptions */ 1685 }; 1686 1687 static void *xen_psm_hdlp; 1688 1689 int 1690 _init(void) 1691 { 1692 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1693 } 1694 1695 int 1696 _fini(void) 1697 { 1698 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1699 } 1700 1701 int 1702 _info(struct modinfo *modinfop) 1703 { 1704 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1705 } 1706