1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #define PSMI_1_6 28 29 #include <sys/mutex.h> 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/clock.h> 33 #include <sys/machlock.h> 34 #include <sys/smp_impldefs.h> 35 #include <sys/uadmin.h> 36 #include <sys/promif.h> 37 #include <sys/psm.h> 38 #include <sys/psm_common.h> 39 #include <sys/atomic.h> 40 #include <sys/apic.h> 41 #include <sys/archsystm.h> 42 #include <sys/mach_intr.h> 43 #include <sys/hypervisor.h> 44 #include <sys/evtchn_impl.h> 45 #include <sys/modctl.h> 46 #include <sys/trap.h> 47 #include <sys/panic.h> 48 #include <sys/sysmacros.h> 49 #include <sys/pci_intr_lib.h> 50 #include <vm/hat_i86.h> 51 52 #include <xen/public/vcpu.h> 53 #include <xen/public/physdev.h> 54 55 56 /* 57 * Global Data 58 */ 59 60 int xen_psm_verbose = 0; 61 62 /* As of now we don't support x2apic in xVM */ 63 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 64 int apic_error = 0; 65 int apic_verbose = 0; 66 cpuset_t apic_cpumask; 67 int apic_forceload = 0; 68 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 69 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 70 }; 71 uchar_t apic_ipltopri[MAXIPL + 1]; 72 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 73 uint_t apic_picinit_called; 74 apic_cpus_info_t *apic_cpus; 75 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 76 /* use to make sure only one cpu handles the nmi */ 77 static lock_t xen_psm_nmi_lock; 78 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 79 int xen_psm_panic_on_nmi = 0; 80 int xen_psm_num_nmis = 0; 81 82 cpuset_t xen_psm_cpus_online; /* online cpus */ 83 int xen_psm_ncpus = 1; /* cpu count */ 84 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 85 86 int xen_support_msi = -1; 87 88 static int xen_clock_irq = INVALID_IRQ; 89 90 /* flag definitions for xen_psm_verbose */ 91 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 92 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 93 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 94 95 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 96 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 97 cmn_err fmt; 98 99 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 100 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 101 prom_printf fmt; 102 103 /* 104 * Dummy apic array to point common routines at that want to do some apic 105 * manipulation. Xen doesn't allow guest apic access so we point at these 106 * memory locations to fake out those who want to do apic fiddling. 107 */ 108 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 109 110 static struct psm_info xen_psm_info; 111 static void xen_psm_setspl(int); 112 113 int 114 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 115 int behavior); 116 int 117 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 118 int behavior); 119 120 /* 121 * Local support routines 122 */ 123 124 /* 125 * Select vcpu to bind xen virtual device interrupt to. 126 */ 127 /*ARGSUSED*/ 128 int 129 xen_psm_bind_intr(int irq) 130 { 131 int bind_cpu; 132 apic_irq_t *irqptr; 133 134 bind_cpu = IRQ_UNBOUND; 135 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 136 return (bind_cpu); 137 if (irq <= APIC_MAX_VECTOR) 138 irqptr = apic_irq_table[irq]; 139 else 140 irqptr = NULL; 141 if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 142 bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 143 if (bind_cpu != IRQ_UNBOUND) { 144 if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 145 bind_cpu = 0; 146 goto done; 147 } 148 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 149 do { 150 bind_cpu = xen_psm_next_bind_cpu++; 151 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 152 xen_psm_next_bind_cpu = 0; 153 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 154 } else { 155 bind_cpu = 0; 156 } 157 done: 158 return (bind_cpu); 159 } 160 161 /* 162 * Autoconfiguration Routines 163 */ 164 165 static int 166 xen_psm_probe(void) 167 { 168 int ret = PSM_SUCCESS; 169 170 if (DOMAIN_IS_INITDOMAIN(xen_info)) 171 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 172 return (ret); 173 } 174 175 static void 176 xen_psm_softinit(void) 177 { 178 /* LINTED logical expression always true: op "||" */ 179 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 180 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 181 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 182 apic_init_common(); 183 } 184 } 185 186 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 187 188 /*ARGSUSED*/ 189 static int 190 xen_psm_clkinit(int hertz) 191 { 192 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 193 extern int dosynctodr; 194 195 /* 196 * domU cannot set the TOD hardware, fault the TOD clock now to 197 * indicate that and turn off attempts to sync TOD hardware 198 * with the hires timer. 199 */ 200 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 201 mutex_enter(&tod_lock); 202 (void) tod_fault(TOD_RDONLY, 0); 203 dosynctodr = 0; 204 mutex_exit(&tod_lock); 205 } 206 /* 207 * The hypervisor provides a timer based on the local APIC timer. 208 * The interface supports requests of nanosecond resolution. 209 * A common frequency of the apic clock is 100 Mhz which 210 * gives a resolution of 10 nsec per tick. What we would really like 211 * is a way to get the ns per tick value from xen. 212 * XXPV - This is an assumption that needs checking and may change 213 */ 214 return (XEN_NSEC_PER_TICK); 215 } 216 217 static void 218 xen_psm_hrtimeinit(void) 219 { 220 extern int gethrtime_hires; 221 gethrtime_hires = 1; 222 } 223 224 /* xen_psm NMI handler */ 225 /*ARGSUSED*/ 226 static void 227 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 228 { 229 xen_psm_num_nmis++; 230 231 if (!lock_try(&xen_psm_nmi_lock)) 232 return; 233 234 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 debug_enter("NMI received: entering kmdb\n"); 236 } else if (xen_psm_panic_on_nmi) { 237 /* Keep panic from entering kmdb. */ 238 nopanicdebug = 1; 239 panic("NMI received\n"); 240 } else { 241 /* 242 * prom_printf is the best shot we have of something which is 243 * problem free from high level/NMI type of interrupts 244 */ 245 prom_printf("NMI received\n"); 246 } 247 248 lock_clear(&xen_psm_nmi_lock); 249 } 250 251 static void 252 xen_psm_picinit() 253 { 254 int cpu, irqno; 255 cpuset_t cpus; 256 257 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 258 /* set a flag so we know we have run xen_psm_picinit() */ 259 apic_picinit_called = 1; 260 LOCK_INIT_CLEAR(&apic_ioapic_lock); 261 262 /* XXPV - do we need to do this? */ 263 picsetup(); /* initialise the 8259 */ 264 265 /* enable apic mode if imcr present */ 266 /* XXPV - do we need to do this either? */ 267 if (apic_imcrp) { 268 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 269 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 270 } 271 272 ioapic_init_intr(IOAPIC_NOMASK); 273 /* 274 * We never called xen_psm_addspl() when the SCI 275 * interrupt was added because that happened before the 276 * PSM module was loaded. Fix that up here by doing 277 * any missed operations (e.g. bind to CPU) 278 */ 279 if ((irqno = apic_sci_vect) > 0) { 280 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 281 CPUSET_ZERO(cpus); 282 CPUSET_OR(cpus, xen_psm_cpus_online); 283 } else { 284 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 285 } 286 ec_set_irq_affinity(irqno, cpus); 287 apic_irq_table[irqno]->airq_temp_cpu = 288 (uchar_t)(cpu & ~IRQ_USER_BOUND); 289 ec_enable_irq(irqno); 290 } 291 } 292 293 /* add nmi handler - least priority nmi handler */ 294 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 295 296 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 297 "xVM_psm NMI handler", (caddr_t)NULL)) 298 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 299 } 300 301 302 /* 303 * generates an interprocessor interrupt to another CPU 304 */ 305 static void 306 xen_psm_send_ipi(int cpun, int ipl) 307 { 308 ulong_t flag = intr_clear(); 309 310 ec_send_ipi(ipl, cpun); 311 intr_restore(flag); 312 } 313 314 /*ARGSUSED*/ 315 static int 316 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 317 { 318 int cpu, ret; 319 cpuset_t cpus; 320 321 /* 322 * We are called at splhi() so we can't call anything that might end 323 * up trying to context switch. 324 */ 325 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 326 DOMAIN_IS_INITDOMAIN(xen_info)) { 327 /* 328 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 329 */ 330 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 331 } else { 332 /* 333 * Set priority/affinity/enable for non PIRQs 334 */ 335 ret = ec_set_irq_priority(irqno, ipl); 336 ASSERT(ret == 0); 337 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 338 CPUSET_ZERO(cpus); 339 CPUSET_OR(cpus, xen_psm_cpus_online); 340 } else { 341 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 342 } 343 ec_set_irq_affinity(irqno, cpus); 344 ec_enable_irq(irqno); 345 } 346 return (ret); 347 } 348 349 /* 350 * Acquire ownership of this irq on this cpu 351 */ 352 void 353 xen_psm_acquire_irq(int irq) 354 { 355 ulong_t flags; 356 int cpuid; 357 358 /* 359 * If the irq is currently being serviced by another cpu 360 * we busy-wait for the other cpu to finish. Take any 361 * pending interrupts before retrying. 362 */ 363 do { 364 flags = intr_clear(); 365 cpuid = ec_block_irq(irq); 366 intr_restore(flags); 367 } while (cpuid != CPU->cpu_id); 368 } 369 370 /*ARGSUSED*/ 371 static int 372 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 373 { 374 apic_irq_t *irqptr; 375 int err = PSM_SUCCESS; 376 377 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 378 DOMAIN_IS_INITDOMAIN(xen_info)) { 379 irqptr = apic_irq_table[irqno]; 380 /* 381 * unbind if no more sharers of this irq/evtchn 382 */ 383 if (irqptr->airq_share == 1) { 384 xen_psm_acquire_irq(irqno); 385 ec_unbind_irq(irqno); 386 } 387 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 388 /* 389 * If still in use reset priority 390 */ 391 if (!err && irqptr->airq_share != 0) { 392 err = ec_set_irq_priority(irqno, max_ipl); 393 return (err); 394 } 395 } else { 396 xen_psm_acquire_irq(irqno); 397 ec_unbind_irq(irqno); 398 } 399 return (err); 400 } 401 402 static processorid_t 403 xen_psm_get_next_processorid(processorid_t id) 404 { 405 if (id == -1) 406 return (0); 407 408 for (id++; id < NCPU; id++) { 409 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 410 case 0: /* yeah, that one's there */ 411 return (id); 412 default: 413 case X_EINVAL: /* out of range */ 414 return (-1); 415 case X_ENOENT: /* not present in the domain */ 416 /* 417 * It's not clear that we -need- to keep looking 418 * at this point, if, e.g., we can guarantee 419 * the hypervisor always keeps a contiguous range 420 * of vcpus around this is equivalent to "out of range". 421 * 422 * But it would be sad to miss a vcpu we're 423 * supposed to be using .. 424 */ 425 break; 426 } 427 } 428 429 return (-1); 430 } 431 432 /* 433 * XXPV - undo the start cpu op change; return to ignoring this value 434 * - also tweak error handling in main startup loop 435 */ 436 /*ARGSUSED*/ 437 static int 438 xen_psm_cpu_start(processorid_t id, caddr_t arg) 439 { 440 int ret; 441 442 ASSERT(id > 0); 443 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 444 ec_bind_cpu_ipis(id); 445 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 446 if ((ret = xen_vcpu_up(id)) == 0) 447 xen_psm_ncpus++; 448 else 449 ret = EINVAL; 450 return (ret); 451 } 452 453 /* 454 * Allocate an irq for inter cpu signaling 455 */ 456 /*ARGSUSED*/ 457 static int 458 xen_psm_get_ipivect(int ipl, int type) 459 { 460 return (ec_bind_ipi_to_irq(ipl, 0)); 461 } 462 463 /*ARGSUSED*/ 464 static int 465 xen_psm_get_clockirq(int ipl) 466 { 467 if (xen_clock_irq != INVALID_IRQ) 468 return (xen_clock_irq); 469 470 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 471 return (xen_clock_irq); 472 } 473 474 /*ARGSUSED*/ 475 static void 476 xen_psm_shutdown(int cmd, int fcn) 477 { 478 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 479 480 switch (cmd) { 481 case A_SHUTDOWN: 482 switch (fcn) { 483 case AD_BOOT: 484 case AD_IBOOT: 485 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 486 break; 487 case AD_POWEROFF: 488 /* fall through if domU or if poweroff fails */ 489 if (DOMAIN_IS_INITDOMAIN(xen_info)) 490 if (apic_enable_acpi) 491 (void) acpi_poweroff(); 492 /* FALLTHRU */ 493 case AD_HALT: 494 default: 495 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 496 break; 497 } 498 break; 499 case A_REBOOT: 500 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 501 break; 502 default: 503 return; 504 } 505 } 506 507 508 static int 509 xen_psm_translate_irq(dev_info_t *dip, int irqno) 510 { 511 if (dip == NULL) { 512 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 513 " dip = NULL\n", irqno)); 514 return (irqno); 515 } 516 return (irqno); 517 } 518 519 /* 520 * xen_psm_intr_enter() acks the event that triggered the interrupt and 521 * returns the new priority level, 522 */ 523 /*ARGSUSED*/ 524 static int 525 xen_psm_intr_enter(int ipl, int *vector) 526 { 527 int newipl; 528 uint_t intno; 529 cpu_t *cpu = CPU; 530 531 intno = (*vector); 532 533 ASSERT(intno < NR_IRQS); 534 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 535 536 if (!ec_is_edge_pirq(intno)) 537 ec_clear_irq(intno); 538 539 newipl = autovect[intno].avh_hi_pri; 540 if (newipl == 0) { 541 /* 542 * (newipl == 0) means we have no service routines for this 543 * vector. We will treat this as a spurious interrupt. 544 * We have cleared the pending bit already, clear the event 545 * mask and return a spurious interrupt. This case can happen 546 * when an interrupt delivery is racing with the removal of 547 * of the service routine for that interrupt. 548 */ 549 ec_unmask_irq(intno); 550 newipl = -1; /* flag spurious interrupt */ 551 } else if (newipl <= cpu->cpu_pri) { 552 /* 553 * (newipl <= cpu->cpu_pri) means that we must be trying to 554 * service a vector that was shared with a higher priority 555 * isr. The higher priority handler has been removed and 556 * we need to service this int. We can't return a lower 557 * priority than current cpu priority. Just synthesize a 558 * priority to return that should be acceptable. 559 */ 560 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 561 } 562 return (newipl); 563 } 564 565 566 /* 567 * xen_psm_intr_exit() restores the old interrupt 568 * priority level after processing an interrupt. 569 * It is called with interrupts disabled, and does not enable interrupts. 570 */ 571 /* ARGSUSED */ 572 static void 573 xen_psm_intr_exit(int ipl, int vector) 574 { 575 ec_try_unmask_irq(vector); 576 xen_psm_setspl(ipl); 577 } 578 579 intr_exit_fn_t 580 psm_intr_exit_fn(void) 581 { 582 return (xen_psm_intr_exit); 583 } 584 585 /* 586 * Check if new ipl level allows delivery of previously unserviced events 587 */ 588 static void 589 xen_psm_setspl(int ipl) 590 { 591 struct cpu *cpu = CPU; 592 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 593 uint16_t pending; 594 595 ASSERT(vci->evtchn_upcall_mask != 0); 596 597 /* 598 * If new ipl level will enable any pending interrupts, setup so the 599 * upcoming sti will cause us to get an upcall. 600 */ 601 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 602 if (pending) { 603 int i; 604 ulong_t pending_sels = 0; 605 volatile ulong_t *selp; 606 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 607 608 for (i = bsrw_insn(pending); i > ipl; i--) 609 pending_sels |= cpe->pending_sel[i]; 610 ASSERT(pending_sels); 611 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 612 atomic_or_ulong(selp, pending_sels); 613 vci->evtchn_upcall_pending = 1; 614 } 615 } 616 617 /* 618 * This function provides external interface to the nexus for all 619 * functionality related to the new DDI interrupt framework. 620 * 621 * Input: 622 * dip - pointer to the dev_info structure of the requested device 623 * hdlp - pointer to the internal interrupt handle structure for the 624 * requested interrupt 625 * intr_op - opcode for this call 626 * result - pointer to the integer that will hold the result to be 627 * passed back if return value is PSM_SUCCESS 628 * 629 * Output: 630 * return value is either PSM_SUCCESS or PSM_FAILURE 631 */ 632 int 633 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 634 psm_intr_op_t intr_op, int *result) 635 { 636 int cap; 637 int err; 638 int new_priority; 639 apic_irq_t *irqp; 640 struct intrspec *ispec; 641 642 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 643 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 644 645 switch (intr_op) { 646 case PSM_INTR_OP_CHECK_MSI: 647 /* 648 * Till PCI passthru is supported, only dom0 has MSI/MSIX 649 */ 650 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 651 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 652 DDI_INTR_TYPE_MSIX); 653 break; 654 } 655 /* 656 * Check MSI/X is supported or not at APIC level and 657 * masked off the MSI/X bits in hdlp->ih_type if not 658 * supported before return. If MSI/X is supported, 659 * leave the ih_type unchanged and return. 660 * 661 * hdlp->ih_type passed in from the nexus has all the 662 * interrupt types supported by the device. 663 */ 664 if (xen_support_msi == 0) { 665 /* 666 * if xen_support_msi is not set, call 667 * apic_check_msi_support() to check whether msi 668 * is supported first 669 */ 670 if (apic_check_msi_support() == PSM_SUCCESS) 671 xen_support_msi = 1; 672 else 673 xen_support_msi = -1; 674 } 675 if (xen_support_msi == 1) 676 *result = hdlp->ih_type; 677 else 678 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 679 DDI_INTR_TYPE_MSIX); 680 break; 681 case PSM_INTR_OP_ALLOC_VECTORS: 682 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 683 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 684 hdlp->ih_scratch1, hdlp->ih_pri, 685 (int)(uintptr_t)hdlp->ih_scratch2); 686 else 687 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 688 hdlp->ih_scratch1, hdlp->ih_pri, 689 (int)(uintptr_t)hdlp->ih_scratch2); 690 break; 691 case PSM_INTR_OP_FREE_VECTORS: 692 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 693 hdlp->ih_pri, hdlp->ih_type); 694 break; 695 case PSM_INTR_OP_NAVAIL_VECTORS: 696 /* 697 * XXPV - maybe we should make this be: 698 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 699 */ 700 if (DOMAIN_IS_INITDOMAIN(xen_info)) 701 *result = APIC_VECTOR_PER_IPL; 702 else 703 *result = 1; 704 break; 705 case PSM_INTR_OP_XLATE_VECTOR: 706 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 707 if (ispec->intrspec_vec >= PIRQ_BASE && 708 ispec->intrspec_vec < NR_PIRQS && 709 DOMAIN_IS_INITDOMAIN(xen_info)) { 710 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 711 } else { 712 *result = ispec->intrspec_vec; 713 } 714 break; 715 case PSM_INTR_OP_GET_PENDING: 716 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 717 *result = ec_pending_irq(hdlp->ih_vector); 718 break; 719 case PSM_INTR_OP_CLEAR_MASK: 720 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 721 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 722 return (PSM_FAILURE); 723 ec_enable_irq(hdlp->ih_vector); 724 break; 725 case PSM_INTR_OP_SET_MASK: 726 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 727 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 728 return (PSM_FAILURE); 729 ec_disable_irq(hdlp->ih_vector); 730 break; 731 case PSM_INTR_OP_GET_CAP: 732 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 733 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 734 cap |= DDI_INTR_FLAG_MASKABLE; 735 *result = cap; 736 break; 737 case PSM_INTR_OP_GET_SHARED: 738 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 739 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 740 return (PSM_FAILURE); 741 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 742 == NULL) 743 return (PSM_FAILURE); 744 *result = irqp->airq_share ? 1: 0; 745 } else { 746 return (PSM_FAILURE); 747 } 748 break; 749 case PSM_INTR_OP_SET_PRI: 750 new_priority = *(int *)result; 751 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 752 if (err != 0) 753 return (PSM_FAILURE); 754 break; 755 case PSM_INTR_OP_GET_INTR: 756 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 757 return (PSM_FAILURE); 758 /* 759 * The interrupt handle given here has been allocated 760 * specifically for this command, and ih_private carries 761 * a pointer to a apic_get_intr_t. 762 */ 763 if (apic_get_vector_intr_info( 764 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 765 return (PSM_FAILURE); 766 break; 767 case PSM_INTR_OP_SET_CAP: 768 /* FALLTHRU */ 769 default: 770 return (PSM_FAILURE); 771 } 772 return (PSM_SUCCESS); 773 } 774 775 static void 776 xen_psm_rebind_irq(int irq) 777 { 778 cpuset_t ncpu; 779 processorid_t newcpu; 780 apic_irq_t *irqptr; 781 782 newcpu = xen_psm_bind_intr(irq); 783 if (newcpu == IRQ_UNBOUND) { 784 CPUSET_ZERO(ncpu); 785 CPUSET_OR(ncpu, xen_psm_cpus_online); 786 } else { 787 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 788 } 789 ec_set_irq_affinity(irq, ncpu); 790 if (irq <= APIC_MAX_VECTOR) { 791 irqptr = apic_irq_table[irq]; 792 ASSERT(irqptr != NULL); 793 irqptr->airq_temp_cpu = (uchar_t)newcpu; 794 } 795 } 796 797 /* 798 * Disable all device interrupts for the given cpu. 799 * High priority interrupts are not disabled and will still be serviced. 800 */ 801 static int 802 xen_psm_disable_intr(processorid_t cpun) 803 { 804 int irq; 805 806 /* 807 * Can't offline VCPU 0 on this hypervisor. There's no reason 808 * anyone would want to given that the CPUs are virtual. Also note 809 * that the hypervisor requires suspend/resume to be on VCPU 0. 810 */ 811 if (cpun == 0) 812 return (PSM_FAILURE); 813 814 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 815 for (irq = 0; irq < NR_IRQS; irq++) { 816 if (!ec_irq_needs_rebind(irq, cpun)) 817 continue; 818 xen_psm_rebind_irq(irq); 819 } 820 return (PSM_SUCCESS); 821 } 822 823 static void 824 xen_psm_enable_intr(processorid_t cpun) 825 { 826 int irq; 827 828 if (cpun == 0) 829 return; 830 831 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 832 833 /* 834 * Rebalance device interrupts among online processors 835 */ 836 for (irq = 0; irq < NR_IRQS; irq++) { 837 if (!ec_irq_rebindable(irq)) 838 continue; 839 xen_psm_rebind_irq(irq); 840 } 841 842 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 843 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 844 } 845 } 846 847 static int 848 xen_psm_post_cpu_start() 849 { 850 processorid_t cpun; 851 852 cpun = psm_get_cpu_id(); 853 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 854 /* 855 * Non-virtualized environments can call psm_post_cpu_start 856 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 857 * xen_psm_post_cpu_start() is only called from boot. 858 */ 859 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 860 } 861 return (PSM_SUCCESS); 862 } 863 864 /* 865 * This function will reprogram the timer. 866 * 867 * When in oneshot mode the argument is the absolute time in future at which to 868 * generate the interrupt. 869 * 870 * When in periodic mode, the argument is the interval at which the 871 * interrupts should be generated. There is no need to support the periodic 872 * mode timer change at this time. 873 * 874 * Note that we must be careful to convert from hrtime to Xen system time (see 875 * xpv_timestamp.c). 876 */ 877 static void 878 xen_psm_timer_reprogram(hrtime_t timer_req) 879 { 880 hrtime_t now, timer_new, time_delta, xen_time; 881 ulong_t flags; 882 883 flags = intr_clear(); 884 /* 885 * We should be called from high PIL context (CBE_HIGH_PIL), 886 * so kpreempt is disabled. 887 */ 888 889 now = xpv_gethrtime(); 890 xen_time = xpv_getsystime(); 891 if (timer_req <= now) { 892 /* 893 * requested to generate an interrupt in the past 894 * generate an interrupt as soon as possible 895 */ 896 time_delta = XEN_NSEC_PER_TICK; 897 } else 898 time_delta = timer_req - now; 899 900 timer_new = xen_time + time_delta; 901 if (HYPERVISOR_set_timer_op(timer_new) != 0) 902 panic("can't set hypervisor timer?"); 903 intr_restore(flags); 904 } 905 906 /* 907 * This function will enable timer interrupts. 908 */ 909 static void 910 xen_psm_timer_enable(void) 911 { 912 ec_unmask_irq(xen_clock_irq); 913 } 914 915 /* 916 * This function will disable timer interrupts on the current cpu. 917 */ 918 static void 919 xen_psm_timer_disable(void) 920 { 921 (void) ec_block_irq(xen_clock_irq); 922 /* 923 * If the clock irq is pending on this cpu then we need to 924 * clear the pending interrupt. 925 */ 926 ec_unpend_irq(xen_clock_irq); 927 } 928 929 /* 930 * 931 * The following functions are in the platform specific file so that they 932 * can be different functions depending on whether we are running on 933 * bare metal or a hypervisor. 934 */ 935 936 /* 937 * Allocate a free vector for irq at ipl. 938 */ 939 /* ARGSUSED */ 940 uchar_t 941 apic_allocate_vector(int ipl, int irq, int pri) 942 { 943 physdev_irq_t irq_op; 944 uchar_t vector; 945 int rc; 946 947 irq_op.irq = irq; 948 949 if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 950 != 0) 951 panic("Hypervisor alloc vector failed err: %d", -rc); 952 vector = irq_op.vector; 953 /* 954 * No need to worry about vector colliding with our reserved vectors 955 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 956 * generated traps and handle them properly. 957 */ 958 apic_vector_to_irq[vector] = (uchar_t)irq; 959 return (vector); 960 } 961 962 /* Mark vector as not being used by any irq */ 963 void 964 apic_free_vector(uchar_t vector) 965 { 966 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 967 } 968 969 /* 970 * This function returns the no. of vectors available for the pri. 971 * dip is not used at this moment. If we really don't need that, 972 * it will be removed. Since priority is not limited by hardware 973 * when running on the hypervisor we simply return the maximum no. 974 * of available contiguous vectors. 975 */ 976 /*ARGSUSED*/ 977 int 978 apic_navail_vector(dev_info_t *dip, int pri) 979 { 980 int lowest, highest, i, navail, count; 981 982 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 983 (void *)dip, pri)); 984 985 highest = APIC_MAX_VECTOR; 986 lowest = APIC_BASE_VECT; 987 navail = count = 0; 988 989 /* It has to be contiguous */ 990 for (i = lowest; i < highest; i++) { 991 count = 0; 992 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 993 (i < highest)) { 994 count++; 995 i++; 996 } 997 if (count > navail) 998 navail = count; 999 } 1000 return (navail); 1001 } 1002 1003 static physdev_manage_pci_t *managed_devlist; 1004 static int mdev_cnt; 1005 static int mdev_size = 128; 1006 static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1007 1008 /* 1009 * Add devfn on given bus to devices managed by hypervisor 1010 */ 1011 static int 1012 xen_manage_device(uint8_t bus, uint8_t devfn) 1013 { 1014 physdev_manage_pci_t manage_pci, *newlist; 1015 int rc, i, oldsize; 1016 1017 /* 1018 * Check if bus/devfn already managed. If so just return success. 1019 */ 1020 if (managed_devlist == NULL) { 1021 managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1022 mdev_size, KM_NOSLEEP); 1023 if (managed_devlist == NULL) { 1024 cmn_err(CE_WARN, 1025 "Can't alloc space for managed device list"); 1026 return (0); 1027 } 1028 }; 1029 for (i = 0; i < mdev_cnt; i++) { 1030 if (managed_devlist[i].bus == bus && 1031 managed_devlist[i].devfn == devfn) 1032 return (1); /* device already managed */ 1033 } 1034 manage_pci.bus = bus; 1035 manage_pci.devfn = devfn; 1036 rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1037 if (rc < 0) { 1038 cmn_err(CE_WARN, 1039 "hypervisor add pci device call failed bus:0x%x" 1040 " devfn:0x%x", bus, devfn); 1041 return (0); 1042 } 1043 /* 1044 * Add device to the managed device list 1045 */ 1046 if (i == mdev_size) { 1047 /* 1048 * grow the managed device list 1049 */ 1050 oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1051 mdev_size *= 2; 1052 newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1053 KM_NOSLEEP); 1054 if (newlist == NULL) { 1055 cmn_err(CE_WARN, "Can't grow managed device list"); 1056 return (0); 1057 } 1058 bcopy(managed_devlist, newlist, oldsize); 1059 kmem_free(managed_devlist, oldsize); 1060 managed_devlist = newlist; 1061 } 1062 managed_devlist[i].bus = bus; 1063 managed_devlist[i].devfn = devfn; 1064 mdev_cnt++; 1065 return (1); 1066 } 1067 1068 /* 1069 * allocate an apic irq struct for an MSI interrupt 1070 */ 1071 static int 1072 msi_allocate_irq(int irq) 1073 { 1074 apic_irq_t *irqptr = apic_irq_table[irq]; 1075 1076 if (irqptr == NULL) { 1077 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1078 if (irqptr == NULL) { 1079 cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1080 return (-1); 1081 } 1082 apic_irq_table[irq] = irqptr; 1083 } else { 1084 if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1085 irqptr->airq_mps_intr_index = FREE_INDEX; 1086 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1087 cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1088 return (-1); 1089 } 1090 } 1091 irqptr->airq_mps_intr_index = FREE_INDEX; 1092 return (irq); 1093 } 1094 1095 /* 1096 * read MSI/MSIX vector out of config space 1097 */ 1098 static uchar_t 1099 xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1100 { 1101 uint64_t msi_data = 0; 1102 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1103 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1104 ushort_t msi_ctrl; 1105 uchar_t vector; 1106 1107 ASSERT((handle != NULL) && (cap_ptr != 0)); 1108 if (type == DDI_INTR_TYPE_MSI) { 1109 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1110 /* 1111 * Get vector 1112 */ 1113 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1114 msi_data = pci_config_get16(handle, 1115 cap_ptr + PCI_MSI_64BIT_DATA); 1116 } else { 1117 msi_data = pci_config_get16(handle, 1118 cap_ptr + PCI_MSI_32BIT_DATA); 1119 } 1120 } else if (type == DDI_INTR_TYPE_MSIX) { 1121 uintptr_t off; 1122 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1123 1124 /* Offset into the given entry in the MSI-X table */ 1125 off = (uintptr_t)msix_p->msix_tbl_addr + 1126 (entry * PCI_MSIX_VECTOR_SIZE); 1127 1128 msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1129 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1130 } 1131 vector = msi_data & 0xff; 1132 return (vector); 1133 } 1134 1135 1136 static void 1137 get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1138 { 1139 pci_regspec_t *regspec; 1140 int reglen; 1141 1142 /* 1143 * Get device reg spec, first word has PCI bus and 1144 * device/function info we need. 1145 */ 1146 if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1147 (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1148 cmn_err(CE_WARN, 1149 "get_busdevfn() failed to get regspec."); 1150 return; 1151 } 1152 /* 1153 * get PCI bus # from reg spec for device 1154 */ 1155 *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1156 /* 1157 * get combined device/function from reg spec for device. 1158 */ 1159 *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1160 PCI_REG_FUNC_SHIFT; 1161 1162 kmem_free(regspec, reglen); 1163 } 1164 1165 /* 1166 * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1167 */ 1168 int 1169 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1170 int behavior) 1171 { 1172 int rcount, i, rc, irqno; 1173 uchar_t vector, cpu; 1174 major_t major; 1175 apic_irq_t *irqptr; 1176 physdev_map_pirq_t map_irq; 1177 int busnum, devfn; 1178 1179 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1180 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1181 (void *)dip, inum, pri, count, behavior)); 1182 1183 if (count > 1) { 1184 if (behavior == DDI_INTR_ALLOC_STRICT && 1185 apic_multi_msi_enable == 0) 1186 return (0); 1187 if (apic_multi_msi_enable == 0) 1188 count = 1; 1189 } 1190 1191 if ((rcount = apic_navail_vector(dip, pri)) > count) 1192 rcount = count; 1193 else if (rcount == 0 || (rcount < count && 1194 behavior == DDI_INTR_ALLOC_STRICT)) 1195 return (0); 1196 1197 /* if not ISP2, then round it down */ 1198 if (!ISP2(rcount)) 1199 rcount = 1 << (highbit(rcount) - 1); 1200 1201 /* 1202 * get PCI bus # and devfn from reg spec for device 1203 */ 1204 get_busdevfn(dip, &busnum, &devfn); 1205 1206 /* 1207 * Tell xen about this pci device 1208 */ 1209 if (!xen_manage_device(busnum, devfn)) 1210 return (0); 1211 1212 mutex_enter(&airq_mutex); 1213 1214 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1215 for (i = 0; i < rcount; i++) { 1216 /* 1217 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1218 */ 1219 map_irq.domid = DOMID_SELF; 1220 map_irq.type = MAP_PIRQ_TYPE_MSI; 1221 map_irq.index = -1; /* hypervisor auto allocates vector */ 1222 map_irq.pirq = -1; 1223 map_irq.bus = busnum; 1224 map_irq.devfn = devfn; 1225 map_irq.entry_nr = 0; 1226 map_irq.table_base = 0; 1227 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1228 irqno = map_irq.pirq; 1229 if (rc < 0) { 1230 mutex_exit(&airq_mutex); 1231 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1232 return (0); 1233 } 1234 if (irqno < 0) { 1235 mutex_exit(&airq_mutex); 1236 cmn_err(CE_NOTE, 1237 "!hypervisor not configured for MSI support"); 1238 xen_support_msi = -1; 1239 return (0); 1240 } 1241 if (msi_allocate_irq(irqno) < 0) { 1242 mutex_exit(&airq_mutex); 1243 return (0); 1244 } 1245 /* 1246 * Find out what vector the hypervisor assigned 1247 */ 1248 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, 0); 1249 apic_max_device_irq = max(irqno, apic_max_device_irq); 1250 apic_min_device_irq = min(irqno, apic_min_device_irq); 1251 irqptr = apic_irq_table[irqno]; 1252 ASSERT(irqptr != NULL); 1253 #ifdef DEBUG 1254 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1255 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1256 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1257 #endif 1258 apic_vector_to_irq[vector] = (uchar_t)irqno; 1259 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1260 1261 irqptr->airq_vector = vector; 1262 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1263 irqptr->airq_intin_no = (uchar_t)rcount; 1264 irqptr->airq_ipl = pri; 1265 irqptr->airq_origirq = (uchar_t)(inum + i); 1266 irqptr->airq_share_id = 0; 1267 irqptr->airq_mps_intr_index = MSI_INDEX; 1268 irqptr->airq_dip = dip; 1269 irqptr->airq_major = major; 1270 if (i == 0) /* they all bind to the same cpu */ 1271 cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1272 else 1273 irqptr->airq_cpu = cpu; 1274 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1275 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1276 (void *)irqptr->airq_dip, irqptr->airq_vector, 1277 irqptr->airq_origirq, pri)); 1278 } 1279 mutex_exit(&airq_mutex); 1280 return (rcount); 1281 } 1282 1283 /* 1284 * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1285 */ 1286 int 1287 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1288 int behavior) 1289 { 1290 int rcount, i, rc; 1291 major_t major; 1292 physdev_map_pirq_t map_irq; 1293 int busnum, devfn; 1294 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1295 uint64_t table_base; 1296 pfn_t pfnum; 1297 1298 if (msix_p == NULL) { 1299 msix_p = pci_msix_init(dip); 1300 if (msix_p != NULL) { 1301 i_ddi_set_msix(dip, msix_p); 1302 } else { 1303 cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1304 " msix_init failed"); 1305 return (0); 1306 } 1307 } 1308 /* 1309 * Hypervisor wants PCI config space address of msix table 1310 */ 1311 pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1312 ~PFN_IS_FOREIGN_MFN; 1313 table_base = (uint64_t)((pfnum << PAGESHIFT) | 1314 ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1315 /* 1316 * get PCI bus # and devfn from reg spec for device 1317 */ 1318 get_busdevfn(dip, &busnum, &devfn); 1319 1320 /* 1321 * Tell xen about this pci device 1322 */ 1323 if (!xen_manage_device(busnum, devfn)) 1324 return (0); 1325 mutex_enter(&airq_mutex); 1326 1327 if ((rcount = apic_navail_vector(dip, pri)) > count) 1328 rcount = count; 1329 else if (rcount == 0 || (rcount < count && 1330 behavior == DDI_INTR_ALLOC_STRICT)) { 1331 rcount = 0; 1332 goto out; 1333 } 1334 1335 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1336 for (i = 0; i < rcount; i++) { 1337 int irqno; 1338 uchar_t vector; 1339 apic_irq_t *irqptr; 1340 1341 /* 1342 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1343 */ 1344 map_irq.domid = DOMID_SELF; 1345 map_irq.type = MAP_PIRQ_TYPE_MSI; 1346 map_irq.index = -1; /* hypervisor auto allocates vector */ 1347 map_irq.pirq = -1; 1348 map_irq.bus = busnum; 1349 map_irq.devfn = devfn; 1350 map_irq.entry_nr = i; 1351 map_irq.table_base = table_base; 1352 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1353 irqno = map_irq.pirq; 1354 if (rc < 0) { 1355 mutex_exit(&airq_mutex); 1356 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1357 return (0); 1358 } 1359 if (irqno < 0) { 1360 mutex_exit(&airq_mutex); 1361 cmn_err(CE_NOTE, 1362 "!hypervisor not configured for MSI support"); 1363 xen_support_msi = -1; 1364 return (0); 1365 } 1366 /* 1367 * Find out what vector the hypervisor assigned 1368 */ 1369 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1370 if (msi_allocate_irq(irqno) < 0) { 1371 mutex_exit(&airq_mutex); 1372 return (0); 1373 } 1374 apic_vector_to_irq[vector] = (uchar_t)irqno; 1375 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1376 apic_max_device_irq = max(irqno, apic_max_device_irq); 1377 apic_min_device_irq = min(irqno, apic_min_device_irq); 1378 irqptr = apic_irq_table[irqno]; 1379 ASSERT(irqptr != NULL); 1380 irqptr->airq_vector = (uchar_t)vector; 1381 irqptr->airq_ipl = pri; 1382 irqptr->airq_origirq = (uchar_t)(inum + i); 1383 irqptr->airq_share_id = 0; 1384 irqptr->airq_mps_intr_index = MSIX_INDEX; 1385 irqptr->airq_dip = dip; 1386 irqptr->airq_major = major; 1387 irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1388 } 1389 out: 1390 mutex_exit(&airq_mutex); 1391 return (rcount); 1392 } 1393 1394 1395 /* 1396 * This finds the apic_irq_t associated with the dip, ispec and type. 1397 * The entry should have already been freed, but it can not have been 1398 * reused yet since the hypervisor can not have reassigned the pirq since 1399 * we have not freed that yet. 1400 */ 1401 static apic_irq_t * 1402 msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1403 { 1404 apic_irq_t *irqp; 1405 int i; 1406 1407 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1408 if ((irqp = apic_irq_table[i]) == NULL) 1409 continue; 1410 if ((irqp->airq_dip == dip) && 1411 (irqp->airq_origirq == ispec->intrspec_vec) && 1412 (irqp->airq_ipl == ispec->intrspec_pri)) { 1413 return (irqp); 1414 } 1415 } 1416 return (NULL); 1417 } 1418 1419 void 1420 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1421 { 1422 int i, rc; 1423 physdev_unmap_pirq_t unmap_pirq; 1424 apic_irq_t *irqptr; 1425 struct intrspec ispec; 1426 1427 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1428 "count: %x pri: %x type: %x\n", 1429 (void *)dip, inum, count, pri, type)); 1430 1431 /* for MSI/X only */ 1432 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1433 return; 1434 1435 for (i = 0; i < count; i++) { 1436 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1437 "pri=0x%x count=0x%x\n", inum, pri, count)); 1438 ispec.intrspec_vec = inum + i; 1439 ispec.intrspec_pri = pri; 1440 if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1441 cmn_err(CE_WARN, 1442 "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1443 ddi_get_name(dip), ddi_get_name_addr(dip), 1444 (void *)dip, inum + i, pri); 1445 continue; 1446 } 1447 /* 1448 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1449 */ 1450 unmap_pirq.domid = DOMID_SELF; 1451 unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1452 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1453 if (rc < 0) { 1454 cmn_err(CE_WARN, "unmap pirq failed"); 1455 return; 1456 } 1457 irqptr->airq_mps_intr_index = FREE_INDEX; 1458 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1459 } 1460 } 1461 1462 /* 1463 * The hypervisor doesn't permit access to local apics directly 1464 */ 1465 /* ARGSUSED */ 1466 uint32_t * 1467 mapin_apic(uint32_t addr, size_t len, int flags) 1468 { 1469 /* 1470 * Return a pointer to a memory area to fake out the 1471 * probe code that wants to read apic registers. 1472 * The dummy values will end up being ignored by xen 1473 * later on when they are used anyway. 1474 */ 1475 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1476 return (xen_psm_dummy_apic); 1477 } 1478 1479 /* ARGSUSED */ 1480 uint32_t * 1481 mapin_ioapic(uint32_t addr, size_t len, int flags) 1482 { 1483 /* 1484 * Return non-null here to fake out configure code that calls this. 1485 * The i86xpv platform will not reference through the returned value.. 1486 */ 1487 return ((uint32_t *)0x1); 1488 } 1489 1490 /* ARGSUSED */ 1491 void 1492 mapout_apic(caddr_t addr, size_t len) 1493 { 1494 } 1495 1496 /* ARGSUSED */ 1497 void 1498 mapout_ioapic(caddr_t addr, size_t len) 1499 { 1500 } 1501 1502 uint32_t 1503 ioapic_read(int apic_ix, uint32_t reg) 1504 { 1505 physdev_apic_t apic; 1506 1507 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1508 apic.reg = reg; 1509 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1510 panic("read ioapic %d reg %d failed", apic_ix, reg); 1511 return (apic.value); 1512 } 1513 1514 void 1515 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1516 { 1517 physdev_apic_t apic; 1518 1519 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1520 apic.reg = reg; 1521 apic.value = value; 1522 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1523 panic("write ioapic %d reg %d failed", apic_ix, reg); 1524 } 1525 1526 /* 1527 * This function was added as part of x2APIC support in pcplusmp. 1528 */ 1529 void 1530 ioapic_write_eoi(int apic_ix, uint32_t value) 1531 { 1532 physdev_apic_t apic; 1533 1534 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1535 apic.reg = APIC_IO_EOI; 1536 apic.value = value; 1537 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1538 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1539 } 1540 1541 /* 1542 * This function was added as part of x2APIC support in pcplusmp to resolve 1543 * undefined symbol in xpv_psm. 1544 */ 1545 void 1546 x2apic_update_psm() 1547 { 1548 } 1549 1550 /* 1551 * This function was added as part of x2APIC support in pcplusmp to resolve 1552 * undefined symbol in xpv_psm. 1553 */ 1554 void 1555 apic_ret() 1556 { 1557 } 1558 1559 /* 1560 * Call rebind to do the actual programming. 1561 */ 1562 int 1563 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1564 { 1565 apic_irq_t *irqptr; 1566 struct ioapic_reprogram_data *drep = NULL; 1567 int rv, cpu; 1568 cpuset_t cpus; 1569 1570 if (deferred) { 1571 drep = (struct ioapic_reprogram_data *)p; 1572 ASSERT(drep != NULL); 1573 irqptr = drep->irqp; 1574 } else { 1575 irqptr = (apic_irq_t *)p; 1576 } 1577 ASSERT(irqptr != NULL); 1578 /* 1579 * Set cpu based on xen idea of online cpu's not apic tables. 1580 * Note that xen ignores/sets to it's own preferred value the 1581 * target cpu field when programming ioapic anyway. 1582 */ 1583 if (irqptr->airq_mps_intr_index == MSI_INDEX) 1584 cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1585 else { 1586 cpu = xen_psm_bind_intr(irq); 1587 irqptr->airq_cpu = cpu; 1588 } 1589 if (cpu == IRQ_UNBOUND) { 1590 CPUSET_ZERO(cpus); 1591 CPUSET_OR(cpus, xen_psm_cpus_online); 1592 } else { 1593 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1594 } 1595 rv = apic_rebind(irqptr, cpu, drep); 1596 if (rv) { 1597 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1598 cpu = 0; 1599 irqptr->airq_cpu = cpu; 1600 rv = apic_rebind(irqptr, cpu, drep); 1601 } 1602 /* 1603 * If rebind successful bind the irq to an event channel 1604 */ 1605 if (rv == 0) { 1606 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1607 CPUSET_FIND(cpus, cpu); 1608 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1609 } 1610 return (rv); 1611 } 1612 1613 /* 1614 * Allocate a new vector for the given irq 1615 */ 1616 /* ARGSUSED */ 1617 uchar_t 1618 apic_modify_vector(uchar_t vector, int irq) 1619 { 1620 return (apic_allocate_vector(0, irq, 0)); 1621 } 1622 1623 /* 1624 * The rest of the file is just generic psm module boilerplate 1625 */ 1626 1627 static struct psm_ops xen_psm_ops = { 1628 xen_psm_probe, /* psm_probe */ 1629 1630 xen_psm_softinit, /* psm_init */ 1631 xen_psm_picinit, /* psm_picinit */ 1632 xen_psm_intr_enter, /* psm_intr_enter */ 1633 xen_psm_intr_exit, /* psm_intr_exit */ 1634 xen_psm_setspl, /* psm_setspl */ 1635 xen_psm_addspl, /* psm_addspl */ 1636 xen_psm_delspl, /* psm_delspl */ 1637 xen_psm_disable_intr, /* psm_disable_intr */ 1638 xen_psm_enable_intr, /* psm_enable_intr */ 1639 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1640 (void (*)(int))NULL, /* psm_set_softintr */ 1641 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1642 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1643 1644 xen_psm_clkinit, /* psm_clkinit */ 1645 xen_psm_get_clockirq, /* psm_get_clockirq */ 1646 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1647 xpv_gethrtime, /* psm_gethrtime */ 1648 1649 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1650 xen_psm_cpu_start, /* psm_cpu_start */ 1651 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1652 xen_psm_shutdown, /* psm_shutdown */ 1653 xen_psm_get_ipivect, /* psm_get_ipivect */ 1654 xen_psm_send_ipi, /* psm_send_ipi */ 1655 1656 xen_psm_translate_irq, /* psm_translate_irq */ 1657 1658 (void (*)(int, char *))NULL, /* psm_notify_error */ 1659 (void (*)(int msg))NULL, /* psm_notify_func */ 1660 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1661 xen_psm_timer_enable, /* psm_timer_enable */ 1662 xen_psm_timer_disable, /* psm_timer_disable */ 1663 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1664 (void (*)(int, int))NULL, /* psm_preshutdown */ 1665 xen_intr_ops, /* Advanced DDI Interrupt framework */ 1666 (int (*)(psm_state_request_t *))NULL /* psm_state */ 1667 }; 1668 1669 static struct psm_info xen_psm_info = { 1670 PSM_INFO_VER01_5, /* version */ 1671 PSM_OWN_EXCLUSIVE, /* ownership */ 1672 &xen_psm_ops, /* operation */ 1673 "xVM_psm", /* machine name */ 1674 "platform module" /* machine descriptions */ 1675 }; 1676 1677 static void *xen_psm_hdlp; 1678 1679 int 1680 _init(void) 1681 { 1682 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1683 } 1684 1685 int 1686 _fini(void) 1687 { 1688 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1689 } 1690 1691 int 1692 _info(struct modinfo *modinfop) 1693 { 1694 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1695 } 1696