1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #define PSMI_1_6 28 29 #include <sys/mutex.h> 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/clock.h> 33 #include <sys/machlock.h> 34 #include <sys/smp_impldefs.h> 35 #include <sys/uadmin.h> 36 #include <sys/promif.h> 37 #include <sys/psm.h> 38 #include <sys/psm_common.h> 39 #include <sys/atomic.h> 40 #include <sys/apic.h> 41 #include <sys/archsystm.h> 42 #include <sys/mach_intr.h> 43 #include <sys/hypervisor.h> 44 #include <sys/evtchn_impl.h> 45 #include <sys/modctl.h> 46 #include <sys/trap.h> 47 #include <sys/panic.h> 48 #include <sys/sysmacros.h> 49 #include <sys/pci_intr_lib.h> 50 #include <vm/hat_i86.h> 51 52 #include <xen/public/vcpu.h> 53 #include <xen/public/physdev.h> 54 55 56 /* 57 * Global Data 58 */ 59 60 int xen_psm_verbose = 0; 61 62 /* As of now we don't support x2apic in xVM */ 63 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 64 int apic_error = 0; 65 int apic_verbose = 0; 66 cpuset_t apic_cpumask; 67 int apic_forceload = 0; 68 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 69 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 70 }; 71 uchar_t apic_ipltopri[MAXIPL + 1]; 72 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 73 uint_t apic_picinit_called; 74 apic_cpus_info_t *apic_cpus; 75 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 76 /* use to make sure only one cpu handles the nmi */ 77 static lock_t xen_psm_nmi_lock; 78 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 79 int xen_psm_panic_on_nmi = 0; 80 int xen_psm_num_nmis = 0; 81 82 cpuset_t xen_psm_cpus_online; /* online cpus */ 83 int xen_psm_ncpus = 1; /* cpu count */ 84 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 85 86 int xen_support_msi = -1; 87 88 static int xen_clock_irq = INVALID_IRQ; 89 90 /* flag definitions for xen_psm_verbose */ 91 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 92 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 93 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 94 95 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 96 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 97 cmn_err fmt; 98 99 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 100 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 101 prom_printf fmt; 102 103 /* 104 * Dummy apic array to point common routines at that want to do some apic 105 * manipulation. Xen doesn't allow guest apic access so we point at these 106 * memory locations to fake out those who want to do apic fiddling. 107 */ 108 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 109 110 static struct psm_info xen_psm_info; 111 static void xen_psm_setspl(int); 112 113 int 114 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 115 int behavior); 116 int 117 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 118 int behavior); 119 120 /* 121 * Local support routines 122 */ 123 124 /* 125 * Select vcpu to bind xen virtual device interrupt to. 126 */ 127 /*ARGSUSED*/ 128 int 129 xen_psm_bind_intr(int irq) 130 { 131 int bind_cpu; 132 apic_irq_t *irqptr; 133 134 bind_cpu = IRQ_UNBOUND; 135 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 136 return (bind_cpu); 137 if (irq <= APIC_MAX_VECTOR) 138 irqptr = apic_irq_table[irq]; 139 else 140 irqptr = NULL; 141 if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 142 bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 143 if (bind_cpu != IRQ_UNBOUND) { 144 if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 145 bind_cpu = 0; 146 goto done; 147 } 148 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 149 do { 150 bind_cpu = xen_psm_next_bind_cpu++; 151 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 152 xen_psm_next_bind_cpu = 0; 153 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 154 } else { 155 bind_cpu = 0; 156 } 157 done: 158 return (bind_cpu); 159 } 160 161 /* 162 * Autoconfiguration Routines 163 */ 164 165 static int 166 xen_psm_probe(void) 167 { 168 int ret = PSM_SUCCESS; 169 170 if (DOMAIN_IS_INITDOMAIN(xen_info)) 171 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 172 return (ret); 173 } 174 175 static void 176 xen_psm_softinit(void) 177 { 178 /* LINTED logical expression always true: op "||" */ 179 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 180 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 181 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 182 apic_init_common(); 183 } 184 } 185 186 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 187 188 /*ARGSUSED*/ 189 static int 190 xen_psm_clkinit(int hertz) 191 { 192 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 193 extern int dosynctodr; 194 195 /* 196 * domU cannot set the TOD hardware, fault the TOD clock now to 197 * indicate that and turn off attempts to sync TOD hardware 198 * with the hires timer. 199 */ 200 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 201 mutex_enter(&tod_lock); 202 (void) tod_fault(TOD_RDONLY, 0); 203 dosynctodr = 0; 204 mutex_exit(&tod_lock); 205 } 206 /* 207 * The hypervisor provides a timer based on the local APIC timer. 208 * The interface supports requests of nanosecond resolution. 209 * A common frequency of the apic clock is 100 Mhz which 210 * gives a resolution of 10 nsec per tick. What we would really like 211 * is a way to get the ns per tick value from xen. 212 * XXPV - This is an assumption that needs checking and may change 213 */ 214 return (XEN_NSEC_PER_TICK); 215 } 216 217 static void 218 xen_psm_hrtimeinit(void) 219 { 220 extern int gethrtime_hires; 221 gethrtime_hires = 1; 222 } 223 224 /* xen_psm NMI handler */ 225 /*ARGSUSED*/ 226 static void 227 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 228 { 229 xen_psm_num_nmis++; 230 231 if (!lock_try(&xen_psm_nmi_lock)) 232 return; 233 234 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 debug_enter("NMI received: entering kmdb\n"); 236 } else if (xen_psm_panic_on_nmi) { 237 /* Keep panic from entering kmdb. */ 238 nopanicdebug = 1; 239 panic("NMI received\n"); 240 } else { 241 /* 242 * prom_printf is the best shot we have of something which is 243 * problem free from high level/NMI type of interrupts 244 */ 245 prom_printf("NMI received\n"); 246 } 247 248 lock_clear(&xen_psm_nmi_lock); 249 } 250 251 static void 252 xen_psm_picinit() 253 { 254 int cpu, irqno; 255 cpuset_t cpus; 256 257 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 258 /* set a flag so we know we have run xen_psm_picinit() */ 259 apic_picinit_called = 1; 260 LOCK_INIT_CLEAR(&apic_ioapic_lock); 261 262 /* XXPV - do we need to do this? */ 263 picsetup(); /* initialise the 8259 */ 264 265 /* enable apic mode if imcr present */ 266 /* XXPV - do we need to do this either? */ 267 if (apic_imcrp) { 268 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 269 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 270 } 271 272 ioapic_init_intr(IOAPIC_NOMASK); 273 /* 274 * We never called xen_psm_addspl() when the SCI 275 * interrupt was added because that happened before the 276 * PSM module was loaded. Fix that up here by doing 277 * any missed operations (e.g. bind to CPU) 278 */ 279 if ((irqno = apic_sci_vect) > 0) { 280 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 281 CPUSET_ZERO(cpus); 282 CPUSET_OR(cpus, xen_psm_cpus_online); 283 } else { 284 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 285 } 286 ec_set_irq_affinity(irqno, cpus); 287 apic_irq_table[irqno]->airq_temp_cpu = 288 (uchar_t)(cpu & ~IRQ_USER_BOUND); 289 ec_enable_irq(irqno); 290 } 291 } 292 293 /* add nmi handler - least priority nmi handler */ 294 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 295 296 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 297 "xVM_psm NMI handler", (caddr_t)NULL)) 298 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 299 } 300 301 302 /* 303 * generates an interprocessor interrupt to another CPU 304 */ 305 static void 306 xen_psm_send_ipi(int cpun, int ipl) 307 { 308 ulong_t flag = intr_clear(); 309 310 ec_send_ipi(ipl, cpun); 311 intr_restore(flag); 312 } 313 314 /*ARGSUSED*/ 315 static int 316 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 317 { 318 int cpu, ret; 319 cpuset_t cpus; 320 321 /* 322 * We are called at splhi() so we can't call anything that might end 323 * up trying to context switch. 324 */ 325 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 326 DOMAIN_IS_INITDOMAIN(xen_info)) { 327 /* 328 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 329 */ 330 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 331 } else { 332 /* 333 * Set priority/affinity/enable for non PIRQs 334 */ 335 ret = ec_set_irq_priority(irqno, ipl); 336 ASSERT(ret == 0); 337 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 338 CPUSET_ZERO(cpus); 339 CPUSET_OR(cpus, xen_psm_cpus_online); 340 } else { 341 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 342 } 343 ec_set_irq_affinity(irqno, cpus); 344 ec_enable_irq(irqno); 345 } 346 return (ret); 347 } 348 349 /* 350 * Acquire ownership of this irq on this cpu 351 */ 352 void 353 xen_psm_acquire_irq(int irq) 354 { 355 ulong_t flags; 356 int cpuid; 357 358 /* 359 * If the irq is currently being serviced by another cpu 360 * we busy-wait for the other cpu to finish. Take any 361 * pending interrupts before retrying. 362 */ 363 do { 364 flags = intr_clear(); 365 cpuid = ec_block_irq(irq); 366 intr_restore(flags); 367 } while (cpuid != CPU->cpu_id); 368 } 369 370 /*ARGSUSED*/ 371 static int 372 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 373 { 374 apic_irq_t *irqptr; 375 int err = PSM_SUCCESS; 376 377 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 378 DOMAIN_IS_INITDOMAIN(xen_info)) { 379 irqptr = apic_irq_table[irqno]; 380 /* 381 * unbind if no more sharers of this irq/evtchn 382 */ 383 if (irqptr->airq_share == 1) { 384 xen_psm_acquire_irq(irqno); 385 ec_unbind_irq(irqno); 386 } 387 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 388 /* 389 * If still in use reset priority 390 */ 391 if (!err && irqptr->airq_share != 0) { 392 err = ec_set_irq_priority(irqno, max_ipl); 393 return (err); 394 } 395 } else { 396 xen_psm_acquire_irq(irqno); 397 ec_unbind_irq(irqno); 398 } 399 return (err); 400 } 401 402 static processorid_t 403 xen_psm_get_next_processorid(processorid_t id) 404 { 405 if (id == -1) 406 return (0); 407 408 for (id++; id < NCPU; id++) { 409 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 410 case 0: /* yeah, that one's there */ 411 return (id); 412 default: 413 case X_EINVAL: /* out of range */ 414 return (-1); 415 case X_ENOENT: /* not present in the domain */ 416 /* 417 * It's not clear that we -need- to keep looking 418 * at this point, if, e.g., we can guarantee 419 * the hypervisor always keeps a contiguous range 420 * of vcpus around this is equivalent to "out of range". 421 * 422 * But it would be sad to miss a vcpu we're 423 * supposed to be using .. 424 */ 425 break; 426 } 427 } 428 429 return (-1); 430 } 431 432 /* 433 * XXPV - undo the start cpu op change; return to ignoring this value 434 * - also tweak error handling in main startup loop 435 */ 436 /*ARGSUSED*/ 437 static int 438 xen_psm_cpu_start(processorid_t id, caddr_t arg) 439 { 440 int ret; 441 442 ASSERT(id > 0); 443 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 444 ec_bind_cpu_ipis(id); 445 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 446 if ((ret = xen_vcpu_up(id)) == 0) 447 xen_psm_ncpus++; 448 else 449 ret = EINVAL; 450 return (ret); 451 } 452 453 /* 454 * Allocate an irq for inter cpu signaling 455 */ 456 /*ARGSUSED*/ 457 static int 458 xen_psm_get_ipivect(int ipl, int type) 459 { 460 return (ec_bind_ipi_to_irq(ipl, 0)); 461 } 462 463 /*ARGSUSED*/ 464 static int 465 xen_psm_get_clockirq(int ipl) 466 { 467 if (xen_clock_irq != INVALID_IRQ) 468 return (xen_clock_irq); 469 470 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 471 return (xen_clock_irq); 472 } 473 474 /*ARGSUSED*/ 475 static void 476 xen_psm_shutdown(int cmd, int fcn) 477 { 478 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 479 480 switch (cmd) { 481 case A_SHUTDOWN: 482 switch (fcn) { 483 case AD_BOOT: 484 case AD_IBOOT: 485 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 486 break; 487 case AD_POWEROFF: 488 /* fall through if domU or if poweroff fails */ 489 if (DOMAIN_IS_INITDOMAIN(xen_info)) 490 if (apic_enable_acpi) 491 (void) acpi_poweroff(); 492 /* FALLTHRU */ 493 case AD_HALT: 494 default: 495 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 496 break; 497 } 498 break; 499 case A_REBOOT: 500 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 501 break; 502 default: 503 return; 504 } 505 } 506 507 508 static int 509 xen_psm_translate_irq(dev_info_t *dip, int irqno) 510 { 511 if (dip == NULL) { 512 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 513 " dip = NULL\n", irqno)); 514 return (irqno); 515 } 516 return (irqno); 517 } 518 519 /* 520 * xen_psm_intr_enter() acks the event that triggered the interrupt and 521 * returns the new priority level, 522 */ 523 /*ARGSUSED*/ 524 static int 525 xen_psm_intr_enter(int ipl, int *vector) 526 { 527 int newipl; 528 uint_t intno; 529 cpu_t *cpu = CPU; 530 531 intno = (*vector); 532 533 ASSERT(intno < NR_IRQS); 534 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 535 536 if (!ec_is_edge_pirq(intno)) 537 ec_clear_irq(intno); 538 539 newipl = autovect[intno].avh_hi_pri; 540 if (newipl == 0) { 541 /* 542 * (newipl == 0) means we have no service routines for this 543 * vector. We will treat this as a spurious interrupt. 544 * We have cleared the pending bit already, clear the event 545 * mask and return a spurious interrupt. This case can happen 546 * when an interrupt delivery is racing with the removal of 547 * of the service routine for that interrupt. 548 */ 549 ec_unmask_irq(intno); 550 newipl = -1; /* flag spurious interrupt */ 551 } else if (newipl <= cpu->cpu_pri) { 552 /* 553 * (newipl <= cpu->cpu_pri) means that we must be trying to 554 * service a vector that was shared with a higher priority 555 * isr. The higher priority handler has been removed and 556 * we need to service this int. We can't return a lower 557 * priority than current cpu priority. Just synthesize a 558 * priority to return that should be acceptable. 559 */ 560 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 561 } 562 return (newipl); 563 } 564 565 566 /* 567 * xen_psm_intr_exit() restores the old interrupt 568 * priority level after processing an interrupt. 569 * It is called with interrupts disabled, and does not enable interrupts. 570 */ 571 /* ARGSUSED */ 572 static void 573 xen_psm_intr_exit(int ipl, int vector) 574 { 575 ec_try_unmask_irq(vector); 576 xen_psm_setspl(ipl); 577 } 578 579 intr_exit_fn_t 580 psm_intr_exit_fn(void) 581 { 582 return (xen_psm_intr_exit); 583 } 584 585 /* 586 * Check if new ipl level allows delivery of previously unserviced events 587 */ 588 static void 589 xen_psm_setspl(int ipl) 590 { 591 struct cpu *cpu = CPU; 592 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 593 uint16_t pending; 594 595 ASSERT(vci->evtchn_upcall_mask != 0); 596 597 /* 598 * If new ipl level will enable any pending interrupts, setup so the 599 * upcoming sti will cause us to get an upcall. 600 */ 601 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 602 if (pending) { 603 int i; 604 ulong_t pending_sels = 0; 605 volatile ulong_t *selp; 606 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 607 608 for (i = bsrw_insn(pending); i > ipl; i--) 609 pending_sels |= cpe->pending_sel[i]; 610 ASSERT(pending_sels); 611 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 612 atomic_or_ulong(selp, pending_sels); 613 vci->evtchn_upcall_pending = 1; 614 } 615 } 616 617 /* 618 * This function provides external interface to the nexus for all 619 * functionality related to the new DDI interrupt framework. 620 * 621 * Input: 622 * dip - pointer to the dev_info structure of the requested device 623 * hdlp - pointer to the internal interrupt handle structure for the 624 * requested interrupt 625 * intr_op - opcode for this call 626 * result - pointer to the integer that will hold the result to be 627 * passed back if return value is PSM_SUCCESS 628 * 629 * Output: 630 * return value is either PSM_SUCCESS or PSM_FAILURE 631 */ 632 int 633 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 634 psm_intr_op_t intr_op, int *result) 635 { 636 int cap; 637 int err; 638 int new_priority; 639 apic_irq_t *irqp; 640 struct intrspec *ispec; 641 642 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 643 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 644 645 switch (intr_op) { 646 case PSM_INTR_OP_CHECK_MSI: 647 /* 648 * Till PCI passthru is supported, only dom0 has MSI/MSIX 649 */ 650 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 651 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 652 DDI_INTR_TYPE_MSIX); 653 break; 654 } 655 /* 656 * Check MSI/X is supported or not at APIC level and 657 * masked off the MSI/X bits in hdlp->ih_type if not 658 * supported before return. If MSI/X is supported, 659 * leave the ih_type unchanged and return. 660 * 661 * hdlp->ih_type passed in from the nexus has all the 662 * interrupt types supported by the device. 663 */ 664 if (xen_support_msi == 0) { 665 /* 666 * if xen_support_msi is not set, call 667 * apic_check_msi_support() to check whether msi 668 * is supported first 669 */ 670 if (apic_check_msi_support() == PSM_SUCCESS) 671 xen_support_msi = 1; 672 else 673 xen_support_msi = -1; 674 } 675 if (xen_support_msi == 1) 676 *result = hdlp->ih_type; 677 else 678 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 679 DDI_INTR_TYPE_MSIX); 680 break; 681 case PSM_INTR_OP_ALLOC_VECTORS: 682 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 683 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 684 hdlp->ih_scratch1, hdlp->ih_pri, 685 (int)(uintptr_t)hdlp->ih_scratch2); 686 else 687 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 688 hdlp->ih_scratch1, hdlp->ih_pri, 689 (int)(uintptr_t)hdlp->ih_scratch2); 690 break; 691 case PSM_INTR_OP_FREE_VECTORS: 692 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 693 hdlp->ih_pri, hdlp->ih_type); 694 break; 695 case PSM_INTR_OP_NAVAIL_VECTORS: 696 /* 697 * XXPV - maybe we should make this be: 698 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 699 */ 700 if (DOMAIN_IS_INITDOMAIN(xen_info)) 701 *result = APIC_VECTOR_PER_IPL; 702 else 703 *result = 1; 704 break; 705 case PSM_INTR_OP_XLATE_VECTOR: 706 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 707 if (ispec->intrspec_vec >= PIRQ_BASE && 708 ispec->intrspec_vec < NR_PIRQS && 709 DOMAIN_IS_INITDOMAIN(xen_info)) { 710 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 711 } else { 712 *result = ispec->intrspec_vec; 713 } 714 break; 715 case PSM_INTR_OP_GET_PENDING: 716 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 717 *result = ec_pending_irq(hdlp->ih_vector); 718 break; 719 case PSM_INTR_OP_CLEAR_MASK: 720 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 721 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 722 return (PSM_FAILURE); 723 ec_enable_irq(hdlp->ih_vector); 724 break; 725 case PSM_INTR_OP_SET_MASK: 726 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 727 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 728 return (PSM_FAILURE); 729 ec_disable_irq(hdlp->ih_vector); 730 break; 731 case PSM_INTR_OP_GET_CAP: 732 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 733 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 734 cap |= DDI_INTR_FLAG_MASKABLE; 735 *result = cap; 736 break; 737 case PSM_INTR_OP_GET_SHARED: 738 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 739 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 740 return (PSM_FAILURE); 741 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 742 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 743 == NULL) 744 return (PSM_FAILURE); 745 *result = (irqp->airq_share > 1) ? 1: 0; 746 } else { 747 return (PSM_FAILURE); 748 } 749 break; 750 case PSM_INTR_OP_SET_PRI: 751 new_priority = *(int *)result; 752 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 753 if (err != 0) 754 return (PSM_FAILURE); 755 break; 756 case PSM_INTR_OP_GET_INTR: 757 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 758 return (PSM_FAILURE); 759 /* 760 * The interrupt handle given here has been allocated 761 * specifically for this command, and ih_private carries 762 * a pointer to a apic_get_intr_t. 763 */ 764 if (apic_get_vector_intr_info( 765 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 766 return (PSM_FAILURE); 767 break; 768 case PSM_INTR_OP_SET_CAP: 769 /* FALLTHRU */ 770 default: 771 return (PSM_FAILURE); 772 } 773 return (PSM_SUCCESS); 774 } 775 776 static void 777 xen_psm_rebind_irq(int irq) 778 { 779 cpuset_t ncpu; 780 processorid_t newcpu; 781 apic_irq_t *irqptr; 782 783 newcpu = xen_psm_bind_intr(irq); 784 if (newcpu == IRQ_UNBOUND) { 785 CPUSET_ZERO(ncpu); 786 CPUSET_OR(ncpu, xen_psm_cpus_online); 787 } else { 788 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 789 } 790 ec_set_irq_affinity(irq, ncpu); 791 if (irq <= APIC_MAX_VECTOR) { 792 irqptr = apic_irq_table[irq]; 793 ASSERT(irqptr != NULL); 794 irqptr->airq_temp_cpu = (uchar_t)newcpu; 795 } 796 } 797 798 /* 799 * Disable all device interrupts for the given cpu. 800 * High priority interrupts are not disabled and will still be serviced. 801 */ 802 static int 803 xen_psm_disable_intr(processorid_t cpun) 804 { 805 int irq; 806 807 /* 808 * Can't offline VCPU 0 on this hypervisor. There's no reason 809 * anyone would want to given that the CPUs are virtual. Also note 810 * that the hypervisor requires suspend/resume to be on VCPU 0. 811 */ 812 if (cpun == 0) 813 return (PSM_FAILURE); 814 815 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 816 for (irq = 0; irq < NR_IRQS; irq++) { 817 if (!ec_irq_needs_rebind(irq, cpun)) 818 continue; 819 xen_psm_rebind_irq(irq); 820 } 821 return (PSM_SUCCESS); 822 } 823 824 static void 825 xen_psm_enable_intr(processorid_t cpun) 826 { 827 int irq; 828 829 if (cpun == 0) 830 return; 831 832 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 833 834 /* 835 * Rebalance device interrupts among online processors 836 */ 837 for (irq = 0; irq < NR_IRQS; irq++) { 838 if (!ec_irq_rebindable(irq)) 839 continue; 840 xen_psm_rebind_irq(irq); 841 } 842 843 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 844 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 845 } 846 } 847 848 static int 849 xen_psm_post_cpu_start() 850 { 851 processorid_t cpun; 852 853 cpun = psm_get_cpu_id(); 854 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 855 /* 856 * Non-virtualized environments can call psm_post_cpu_start 857 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 858 * xen_psm_post_cpu_start() is only called from boot. 859 */ 860 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 861 } 862 return (PSM_SUCCESS); 863 } 864 865 /* 866 * This function will reprogram the timer. 867 * 868 * When in oneshot mode the argument is the absolute time in future at which to 869 * generate the interrupt. 870 * 871 * When in periodic mode, the argument is the interval at which the 872 * interrupts should be generated. There is no need to support the periodic 873 * mode timer change at this time. 874 * 875 * Note that we must be careful to convert from hrtime to Xen system time (see 876 * xpv_timestamp.c). 877 */ 878 static void 879 xen_psm_timer_reprogram(hrtime_t timer_req) 880 { 881 hrtime_t now, timer_new, time_delta, xen_time; 882 ulong_t flags; 883 884 flags = intr_clear(); 885 /* 886 * We should be called from high PIL context (CBE_HIGH_PIL), 887 * so kpreempt is disabled. 888 */ 889 890 now = xpv_gethrtime(); 891 xen_time = xpv_getsystime(); 892 if (timer_req <= now) { 893 /* 894 * requested to generate an interrupt in the past 895 * generate an interrupt as soon as possible 896 */ 897 time_delta = XEN_NSEC_PER_TICK; 898 } else 899 time_delta = timer_req - now; 900 901 timer_new = xen_time + time_delta; 902 if (HYPERVISOR_set_timer_op(timer_new) != 0) 903 panic("can't set hypervisor timer?"); 904 intr_restore(flags); 905 } 906 907 /* 908 * This function will enable timer interrupts. 909 */ 910 static void 911 xen_psm_timer_enable(void) 912 { 913 ec_unmask_irq(xen_clock_irq); 914 } 915 916 /* 917 * This function will disable timer interrupts on the current cpu. 918 */ 919 static void 920 xen_psm_timer_disable(void) 921 { 922 (void) ec_block_irq(xen_clock_irq); 923 /* 924 * If the clock irq is pending on this cpu then we need to 925 * clear the pending interrupt. 926 */ 927 ec_unpend_irq(xen_clock_irq); 928 } 929 930 /* 931 * 932 * The following functions are in the platform specific file so that they 933 * can be different functions depending on whether we are running on 934 * bare metal or a hypervisor. 935 */ 936 937 /* 938 * Allocate a free vector for irq at ipl. 939 */ 940 /* ARGSUSED */ 941 uchar_t 942 apic_allocate_vector(int ipl, int irq, int pri) 943 { 944 physdev_irq_t irq_op; 945 uchar_t vector; 946 int rc; 947 948 irq_op.irq = irq; 949 950 if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 951 != 0) 952 panic("Hypervisor alloc vector failed err: %d", -rc); 953 vector = irq_op.vector; 954 /* 955 * No need to worry about vector colliding with our reserved vectors 956 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 957 * generated traps and handle them properly. 958 */ 959 apic_vector_to_irq[vector] = (uchar_t)irq; 960 return (vector); 961 } 962 963 /* Mark vector as not being used by any irq */ 964 void 965 apic_free_vector(uchar_t vector) 966 { 967 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 968 } 969 970 /* 971 * This function returns the no. of vectors available for the pri. 972 * dip is not used at this moment. If we really don't need that, 973 * it will be removed. Since priority is not limited by hardware 974 * when running on the hypervisor we simply return the maximum no. 975 * of available contiguous vectors. 976 */ 977 /*ARGSUSED*/ 978 int 979 apic_navail_vector(dev_info_t *dip, int pri) 980 { 981 int lowest, highest, i, navail, count; 982 983 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 984 (void *)dip, pri)); 985 986 highest = APIC_MAX_VECTOR; 987 lowest = APIC_BASE_VECT; 988 navail = count = 0; 989 990 /* It has to be contiguous */ 991 for (i = lowest; i < highest; i++) { 992 count = 0; 993 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 994 (i < highest)) { 995 count++; 996 i++; 997 } 998 if (count > navail) 999 navail = count; 1000 } 1001 return (navail); 1002 } 1003 1004 static physdev_manage_pci_t *managed_devlist; 1005 static int mdev_cnt; 1006 static int mdev_size = 128; 1007 static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1008 1009 /* 1010 * Add devfn on given bus to devices managed by hypervisor 1011 */ 1012 static int 1013 xen_manage_device(uint8_t bus, uint8_t devfn) 1014 { 1015 physdev_manage_pci_t manage_pci, *newlist; 1016 int rc, i, oldsize; 1017 1018 /* 1019 * Check if bus/devfn already managed. If so just return success. 1020 */ 1021 if (managed_devlist == NULL) { 1022 managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1023 mdev_size, KM_NOSLEEP); 1024 if (managed_devlist == NULL) { 1025 cmn_err(CE_WARN, 1026 "Can't alloc space for managed device list"); 1027 return (0); 1028 } 1029 }; 1030 for (i = 0; i < mdev_cnt; i++) { 1031 if (managed_devlist[i].bus == bus && 1032 managed_devlist[i].devfn == devfn) 1033 return (1); /* device already managed */ 1034 } 1035 manage_pci.bus = bus; 1036 manage_pci.devfn = devfn; 1037 rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1038 if (rc < 0) { 1039 cmn_err(CE_WARN, 1040 "hypervisor add pci device call failed bus:0x%x" 1041 " devfn:0x%x", bus, devfn); 1042 return (0); 1043 } 1044 /* 1045 * Add device to the managed device list 1046 */ 1047 if (i == mdev_size) { 1048 /* 1049 * grow the managed device list 1050 */ 1051 oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1052 mdev_size *= 2; 1053 newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1054 KM_NOSLEEP); 1055 if (newlist == NULL) { 1056 cmn_err(CE_WARN, "Can't grow managed device list"); 1057 return (0); 1058 } 1059 bcopy(managed_devlist, newlist, oldsize); 1060 kmem_free(managed_devlist, oldsize); 1061 managed_devlist = newlist; 1062 } 1063 managed_devlist[i].bus = bus; 1064 managed_devlist[i].devfn = devfn; 1065 mdev_cnt++; 1066 return (1); 1067 } 1068 1069 /* 1070 * allocate an apic irq struct for an MSI interrupt 1071 */ 1072 static int 1073 msi_allocate_irq(int irq) 1074 { 1075 apic_irq_t *irqptr = apic_irq_table[irq]; 1076 1077 if (irqptr == NULL) { 1078 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1079 if (irqptr == NULL) { 1080 cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1081 return (-1); 1082 } 1083 apic_irq_table[irq] = irqptr; 1084 } else { 1085 if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1086 irqptr->airq_mps_intr_index = FREE_INDEX; 1087 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1088 cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1089 return (-1); 1090 } 1091 } 1092 irqptr->airq_mps_intr_index = FREE_INDEX; 1093 return (irq); 1094 } 1095 1096 /* 1097 * read MSI/MSIX vector out of config space 1098 */ 1099 static uchar_t 1100 xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1101 { 1102 uint64_t msi_data = 0; 1103 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1104 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1105 ushort_t msi_ctrl; 1106 uchar_t vector; 1107 1108 ASSERT((handle != NULL) && (cap_ptr != 0)); 1109 if (type == DDI_INTR_TYPE_MSI) { 1110 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1111 /* 1112 * Get vector 1113 */ 1114 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1115 msi_data = pci_config_get16(handle, 1116 cap_ptr + PCI_MSI_64BIT_DATA); 1117 } else { 1118 msi_data = pci_config_get16(handle, 1119 cap_ptr + PCI_MSI_32BIT_DATA); 1120 } 1121 } else if (type == DDI_INTR_TYPE_MSIX) { 1122 uintptr_t off; 1123 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1124 1125 /* Offset into the given entry in the MSI-X table */ 1126 off = (uintptr_t)msix_p->msix_tbl_addr + 1127 (entry * PCI_MSIX_VECTOR_SIZE); 1128 1129 msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1130 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1131 } 1132 vector = msi_data & 0xff; 1133 return (vector); 1134 } 1135 1136 1137 static void 1138 get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1139 { 1140 pci_regspec_t *regspec; 1141 int reglen; 1142 1143 /* 1144 * Get device reg spec, first word has PCI bus and 1145 * device/function info we need. 1146 */ 1147 if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1148 (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1149 cmn_err(CE_WARN, 1150 "get_busdevfn() failed to get regspec."); 1151 return; 1152 } 1153 /* 1154 * get PCI bus # from reg spec for device 1155 */ 1156 *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1157 /* 1158 * get combined device/function from reg spec for device. 1159 */ 1160 *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1161 PCI_REG_FUNC_SHIFT; 1162 1163 kmem_free(regspec, reglen); 1164 } 1165 1166 /* 1167 * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1168 */ 1169 int 1170 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1171 int behavior) 1172 { 1173 int rcount, i, rc, irqno; 1174 uchar_t vector, cpu; 1175 major_t major; 1176 apic_irq_t *irqptr; 1177 physdev_map_pirq_t map_irq; 1178 int busnum, devfn; 1179 1180 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1181 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1182 (void *)dip, inum, pri, count, behavior)); 1183 1184 if (count > 1) { 1185 if (behavior == DDI_INTR_ALLOC_STRICT && 1186 apic_multi_msi_enable == 0) 1187 return (0); 1188 if (apic_multi_msi_enable == 0) 1189 count = 1; 1190 } 1191 1192 if ((rcount = apic_navail_vector(dip, pri)) > count) 1193 rcount = count; 1194 else if (rcount == 0 || (rcount < count && 1195 behavior == DDI_INTR_ALLOC_STRICT)) 1196 return (0); 1197 1198 /* if not ISP2, then round it down */ 1199 if (!ISP2(rcount)) 1200 rcount = 1 << (highbit(rcount) - 1); 1201 1202 /* 1203 * get PCI bus # and devfn from reg spec for device 1204 */ 1205 get_busdevfn(dip, &busnum, &devfn); 1206 1207 /* 1208 * Tell xen about this pci device 1209 */ 1210 if (!xen_manage_device(busnum, devfn)) 1211 return (0); 1212 1213 mutex_enter(&airq_mutex); 1214 1215 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1216 for (i = 0; i < rcount; i++) { 1217 /* 1218 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1219 */ 1220 map_irq.domid = DOMID_SELF; 1221 map_irq.type = MAP_PIRQ_TYPE_MSI; 1222 map_irq.index = -1; /* hypervisor auto allocates vector */ 1223 map_irq.pirq = -1; 1224 map_irq.bus = busnum; 1225 map_irq.devfn = devfn; 1226 map_irq.entry_nr = 0; 1227 map_irq.table_base = 0; 1228 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1229 irqno = map_irq.pirq; 1230 if (rc < 0) { 1231 mutex_exit(&airq_mutex); 1232 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1233 return (0); 1234 } 1235 if (irqno < 0) { 1236 mutex_exit(&airq_mutex); 1237 cmn_err(CE_NOTE, 1238 "!hypervisor not configured for MSI support"); 1239 xen_support_msi = -1; 1240 return (0); 1241 } 1242 if (msi_allocate_irq(irqno) < 0) { 1243 mutex_exit(&airq_mutex); 1244 return (0); 1245 } 1246 /* 1247 * Find out what vector the hypervisor assigned 1248 */ 1249 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, 0); 1250 apic_max_device_irq = max(irqno, apic_max_device_irq); 1251 apic_min_device_irq = min(irqno, apic_min_device_irq); 1252 irqptr = apic_irq_table[irqno]; 1253 ASSERT(irqptr != NULL); 1254 #ifdef DEBUG 1255 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1256 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1257 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1258 #endif 1259 apic_vector_to_irq[vector] = (uchar_t)irqno; 1260 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1261 1262 irqptr->airq_vector = vector; 1263 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1264 irqptr->airq_intin_no = (uchar_t)rcount; 1265 irqptr->airq_ipl = pri; 1266 irqptr->airq_origirq = (uchar_t)(inum + i); 1267 irqptr->airq_share_id = 0; 1268 irqptr->airq_mps_intr_index = MSI_INDEX; 1269 irqptr->airq_dip = dip; 1270 irqptr->airq_major = major; 1271 if (i == 0) /* they all bind to the same cpu */ 1272 cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1273 else 1274 irqptr->airq_cpu = cpu; 1275 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1276 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1277 (void *)irqptr->airq_dip, irqptr->airq_vector, 1278 irqptr->airq_origirq, pri)); 1279 } 1280 mutex_exit(&airq_mutex); 1281 return (rcount); 1282 } 1283 1284 /* 1285 * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1286 */ 1287 int 1288 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1289 int behavior) 1290 { 1291 int rcount, i, rc; 1292 major_t major; 1293 physdev_map_pirq_t map_irq; 1294 int busnum, devfn; 1295 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1296 uint64_t table_base; 1297 pfn_t pfnum; 1298 1299 if (msix_p == NULL) { 1300 msix_p = pci_msix_init(dip); 1301 if (msix_p != NULL) { 1302 i_ddi_set_msix(dip, msix_p); 1303 } else { 1304 cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1305 " msix_init failed"); 1306 return (0); 1307 } 1308 } 1309 /* 1310 * Hypervisor wants PCI config space address of msix table 1311 */ 1312 pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1313 ~PFN_IS_FOREIGN_MFN; 1314 table_base = (uint64_t)((pfnum << PAGESHIFT) | 1315 ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1316 /* 1317 * get PCI bus # and devfn from reg spec for device 1318 */ 1319 get_busdevfn(dip, &busnum, &devfn); 1320 1321 /* 1322 * Tell xen about this pci device 1323 */ 1324 if (!xen_manage_device(busnum, devfn)) 1325 return (0); 1326 mutex_enter(&airq_mutex); 1327 1328 if ((rcount = apic_navail_vector(dip, pri)) > count) 1329 rcount = count; 1330 else if (rcount == 0 || (rcount < count && 1331 behavior == DDI_INTR_ALLOC_STRICT)) { 1332 rcount = 0; 1333 goto out; 1334 } 1335 1336 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1337 for (i = 0; i < rcount; i++) { 1338 int irqno; 1339 uchar_t vector; 1340 apic_irq_t *irqptr; 1341 1342 /* 1343 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1344 */ 1345 map_irq.domid = DOMID_SELF; 1346 map_irq.type = MAP_PIRQ_TYPE_MSI; 1347 map_irq.index = -1; /* hypervisor auto allocates vector */ 1348 map_irq.pirq = -1; 1349 map_irq.bus = busnum; 1350 map_irq.devfn = devfn; 1351 map_irq.entry_nr = i; 1352 map_irq.table_base = table_base; 1353 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1354 irqno = map_irq.pirq; 1355 if (rc < 0) { 1356 mutex_exit(&airq_mutex); 1357 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1358 return (0); 1359 } 1360 if (irqno < 0) { 1361 mutex_exit(&airq_mutex); 1362 cmn_err(CE_NOTE, 1363 "!hypervisor not configured for MSI support"); 1364 xen_support_msi = -1; 1365 return (0); 1366 } 1367 /* 1368 * Find out what vector the hypervisor assigned 1369 */ 1370 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1371 if (msi_allocate_irq(irqno) < 0) { 1372 mutex_exit(&airq_mutex); 1373 return (0); 1374 } 1375 apic_vector_to_irq[vector] = (uchar_t)irqno; 1376 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1377 apic_max_device_irq = max(irqno, apic_max_device_irq); 1378 apic_min_device_irq = min(irqno, apic_min_device_irq); 1379 irqptr = apic_irq_table[irqno]; 1380 ASSERT(irqptr != NULL); 1381 irqptr->airq_vector = (uchar_t)vector; 1382 irqptr->airq_ipl = pri; 1383 irqptr->airq_origirq = (uchar_t)(inum + i); 1384 irqptr->airq_share_id = 0; 1385 irqptr->airq_mps_intr_index = MSIX_INDEX; 1386 irqptr->airq_dip = dip; 1387 irqptr->airq_major = major; 1388 irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1389 } 1390 out: 1391 mutex_exit(&airq_mutex); 1392 return (rcount); 1393 } 1394 1395 1396 /* 1397 * This finds the apic_irq_t associated with the dip, ispec and type. 1398 * The entry should have already been freed, but it can not have been 1399 * reused yet since the hypervisor can not have reassigned the pirq since 1400 * we have not freed that yet. 1401 */ 1402 static apic_irq_t * 1403 msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1404 { 1405 apic_irq_t *irqp; 1406 int i; 1407 1408 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1409 if ((irqp = apic_irq_table[i]) == NULL) 1410 continue; 1411 if ((irqp->airq_dip == dip) && 1412 (irqp->airq_origirq == ispec->intrspec_vec) && 1413 (irqp->airq_ipl == ispec->intrspec_pri)) { 1414 return (irqp); 1415 } 1416 } 1417 return (NULL); 1418 } 1419 1420 void 1421 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1422 { 1423 int i, rc; 1424 physdev_unmap_pirq_t unmap_pirq; 1425 apic_irq_t *irqptr; 1426 struct intrspec ispec; 1427 1428 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1429 "count: %x pri: %x type: %x\n", 1430 (void *)dip, inum, count, pri, type)); 1431 1432 /* for MSI/X only */ 1433 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1434 return; 1435 1436 for (i = 0; i < count; i++) { 1437 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1438 "pri=0x%x count=0x%x\n", inum, pri, count)); 1439 ispec.intrspec_vec = inum + i; 1440 ispec.intrspec_pri = pri; 1441 if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1442 cmn_err(CE_WARN, 1443 "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1444 ddi_get_name(dip), ddi_get_name_addr(dip), 1445 (void *)dip, inum + i, pri); 1446 continue; 1447 } 1448 /* 1449 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1450 */ 1451 unmap_pirq.domid = DOMID_SELF; 1452 unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1453 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1454 if (rc < 0) { 1455 cmn_err(CE_WARN, "unmap pirq failed"); 1456 return; 1457 } 1458 irqptr->airq_mps_intr_index = FREE_INDEX; 1459 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1460 } 1461 } 1462 1463 /* 1464 * The hypervisor doesn't permit access to local apics directly 1465 */ 1466 /* ARGSUSED */ 1467 uint32_t * 1468 mapin_apic(uint32_t addr, size_t len, int flags) 1469 { 1470 /* 1471 * Return a pointer to a memory area to fake out the 1472 * probe code that wants to read apic registers. 1473 * The dummy values will end up being ignored by xen 1474 * later on when they are used anyway. 1475 */ 1476 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1477 return (xen_psm_dummy_apic); 1478 } 1479 1480 /* ARGSUSED */ 1481 uint32_t * 1482 mapin_ioapic(uint32_t addr, size_t len, int flags) 1483 { 1484 /* 1485 * Return non-null here to fake out configure code that calls this. 1486 * The i86xpv platform will not reference through the returned value.. 1487 */ 1488 return ((uint32_t *)0x1); 1489 } 1490 1491 /* ARGSUSED */ 1492 void 1493 mapout_apic(caddr_t addr, size_t len) 1494 { 1495 } 1496 1497 /* ARGSUSED */ 1498 void 1499 mapout_ioapic(caddr_t addr, size_t len) 1500 { 1501 } 1502 1503 uint32_t 1504 ioapic_read(int apic_ix, uint32_t reg) 1505 { 1506 physdev_apic_t apic; 1507 1508 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1509 apic.reg = reg; 1510 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1511 panic("read ioapic %d reg %d failed", apic_ix, reg); 1512 return (apic.value); 1513 } 1514 1515 void 1516 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1517 { 1518 physdev_apic_t apic; 1519 1520 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1521 apic.reg = reg; 1522 apic.value = value; 1523 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1524 panic("write ioapic %d reg %d failed", apic_ix, reg); 1525 } 1526 1527 /* 1528 * This function was added as part of x2APIC support in pcplusmp. 1529 */ 1530 void 1531 ioapic_write_eoi(int apic_ix, uint32_t value) 1532 { 1533 physdev_apic_t apic; 1534 1535 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1536 apic.reg = APIC_IO_EOI; 1537 apic.value = value; 1538 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1539 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1540 } 1541 1542 /* 1543 * This function was added as part of x2APIC support in pcplusmp to resolve 1544 * undefined symbol in xpv_psm. 1545 */ 1546 void 1547 x2apic_update_psm() 1548 { 1549 } 1550 1551 /* 1552 * This function was added as part of x2APIC support in pcplusmp to resolve 1553 * undefined symbol in xpv_psm. 1554 */ 1555 void 1556 apic_ret() 1557 { 1558 } 1559 1560 /* 1561 * Call rebind to do the actual programming. 1562 */ 1563 int 1564 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1565 { 1566 apic_irq_t *irqptr; 1567 struct ioapic_reprogram_data *drep = NULL; 1568 int rv, cpu; 1569 cpuset_t cpus; 1570 1571 if (deferred) { 1572 drep = (struct ioapic_reprogram_data *)p; 1573 ASSERT(drep != NULL); 1574 irqptr = drep->irqp; 1575 } else { 1576 irqptr = (apic_irq_t *)p; 1577 } 1578 ASSERT(irqptr != NULL); 1579 /* 1580 * Set cpu based on xen idea of online cpu's not apic tables. 1581 * Note that xen ignores/sets to it's own preferred value the 1582 * target cpu field when programming ioapic anyway. 1583 */ 1584 if (irqptr->airq_mps_intr_index == MSI_INDEX) 1585 cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1586 else { 1587 cpu = xen_psm_bind_intr(irq); 1588 irqptr->airq_cpu = cpu; 1589 } 1590 if (cpu == IRQ_UNBOUND) { 1591 CPUSET_ZERO(cpus); 1592 CPUSET_OR(cpus, xen_psm_cpus_online); 1593 } else { 1594 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1595 } 1596 rv = apic_rebind(irqptr, cpu, drep); 1597 if (rv) { 1598 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1599 cpu = 0; 1600 irqptr->airq_cpu = cpu; 1601 rv = apic_rebind(irqptr, cpu, drep); 1602 } 1603 /* 1604 * If rebind successful bind the irq to an event channel 1605 */ 1606 if (rv == 0) { 1607 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1608 CPUSET_FIND(cpus, cpu); 1609 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1610 } 1611 return (rv); 1612 } 1613 1614 /* 1615 * Allocate a new vector for the given irq 1616 */ 1617 /* ARGSUSED */ 1618 uchar_t 1619 apic_modify_vector(uchar_t vector, int irq) 1620 { 1621 return (apic_allocate_vector(0, irq, 0)); 1622 } 1623 1624 /* 1625 * The rest of the file is just generic psm module boilerplate 1626 */ 1627 1628 static struct psm_ops xen_psm_ops = { 1629 xen_psm_probe, /* psm_probe */ 1630 1631 xen_psm_softinit, /* psm_init */ 1632 xen_psm_picinit, /* psm_picinit */ 1633 xen_psm_intr_enter, /* psm_intr_enter */ 1634 xen_psm_intr_exit, /* psm_intr_exit */ 1635 xen_psm_setspl, /* psm_setspl */ 1636 xen_psm_addspl, /* psm_addspl */ 1637 xen_psm_delspl, /* psm_delspl */ 1638 xen_psm_disable_intr, /* psm_disable_intr */ 1639 xen_psm_enable_intr, /* psm_enable_intr */ 1640 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1641 (void (*)(int))NULL, /* psm_set_softintr */ 1642 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1643 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1644 1645 xen_psm_clkinit, /* psm_clkinit */ 1646 xen_psm_get_clockirq, /* psm_get_clockirq */ 1647 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1648 xpv_gethrtime, /* psm_gethrtime */ 1649 1650 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1651 xen_psm_cpu_start, /* psm_cpu_start */ 1652 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1653 xen_psm_shutdown, /* psm_shutdown */ 1654 xen_psm_get_ipivect, /* psm_get_ipivect */ 1655 xen_psm_send_ipi, /* psm_send_ipi */ 1656 1657 xen_psm_translate_irq, /* psm_translate_irq */ 1658 1659 (void (*)(int, char *))NULL, /* psm_notify_error */ 1660 (void (*)(int msg))NULL, /* psm_notify_func */ 1661 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1662 xen_psm_timer_enable, /* psm_timer_enable */ 1663 xen_psm_timer_disable, /* psm_timer_disable */ 1664 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1665 (void (*)(int, int))NULL, /* psm_preshutdown */ 1666 xen_intr_ops, /* Advanced DDI Interrupt framework */ 1667 (int (*)(psm_state_request_t *))NULL /* psm_state */ 1668 }; 1669 1670 static struct psm_info xen_psm_info = { 1671 PSM_INFO_VER01_5, /* version */ 1672 PSM_OWN_EXCLUSIVE, /* ownership */ 1673 &xen_psm_ops, /* operation */ 1674 "xVM_psm", /* machine name */ 1675 "platform module" /* machine descriptions */ 1676 }; 1677 1678 static void *xen_psm_hdlp; 1679 1680 int 1681 _init(void) 1682 { 1683 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1684 } 1685 1686 int 1687 _fini(void) 1688 { 1689 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1690 } 1691 1692 int 1693 _info(struct modinfo *modinfop) 1694 { 1695 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1696 } 1697