1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2018 Joyent, Inc. 26 */ 27 28 #define PSMI_1_7 29 30 #include <sys/mutex.h> 31 #include <sys/types.h> 32 #include <sys/time.h> 33 #include <sys/clock.h> 34 #include <sys/machlock.h> 35 #include <sys/smp_impldefs.h> 36 #include <sys/uadmin.h> 37 #include <sys/promif.h> 38 #include <sys/psm.h> 39 #include <sys/psm_common.h> 40 #include <sys/atomic.h> 41 #include <sys/apic.h> 42 #include <sys/archsystm.h> 43 #include <sys/mach_intr.h> 44 #include <sys/hypervisor.h> 45 #include <sys/evtchn_impl.h> 46 #include <sys/modctl.h> 47 #include <sys/trap.h> 48 #include <sys/panic.h> 49 #include <sys/sysmacros.h> 50 #include <sys/pci_intr_lib.h> 51 #include <vm/hat_i86.h> 52 53 #include <xen/public/vcpu.h> 54 #include <xen/public/physdev.h> 55 56 57 /* 58 * Global Data 59 */ 60 61 int xen_psm_verbose = 0; 62 63 /* As of now we don't support x2apic in xVM */ 64 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 65 int apic_error = 0; 66 int apic_verbose = 0; 67 cpuset_t apic_cpumask; 68 int apic_forceload = 0; 69 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 70 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 71 }; 72 uchar_t apic_ipltopri[MAXIPL + 1]; 73 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 74 uint_t apic_picinit_called; 75 apic_cpus_info_t *apic_cpus; 76 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 77 /* use to make sure only one cpu handles the nmi */ 78 static lock_t xen_psm_nmi_lock; 79 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 80 int xen_psm_panic_on_nmi = 0; 81 int xen_psm_num_nmis = 0; 82 83 cpuset_t xen_psm_cpus_online; /* online cpus */ 84 int xen_psm_ncpus = 1; /* cpu count */ 85 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 86 87 int xen_support_msi = 0; 88 89 static int xen_clock_irq = INVALID_IRQ; 90 91 /* flag definitions for xen_psm_verbose */ 92 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 93 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 94 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 95 96 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 97 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 98 cmn_err fmt; 99 100 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 101 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 102 prom_printf fmt; 103 104 /* 105 * Dummy apic array to point common routines at that want to do some apic 106 * manipulation. Xen doesn't allow guest apic access so we point at these 107 * memory locations to fake out those who want to do apic fiddling. 108 */ 109 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 110 111 static struct psm_info xen_psm_info; 112 static void xen_psm_setspl(int); 113 114 int 115 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 116 int behavior); 117 int 118 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 119 int behavior); 120 121 /* 122 * Local support routines 123 */ 124 125 /* 126 * Select vcpu to bind xen virtual device interrupt to. 127 */ 128 /*ARGSUSED*/ 129 int 130 xen_psm_bind_intr(int irq) 131 { 132 int bind_cpu; 133 apic_irq_t *irqptr; 134 135 bind_cpu = IRQ_UNBOUND; 136 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 137 return (bind_cpu); 138 if (irq <= APIC_MAX_VECTOR) 139 irqptr = apic_irq_table[irq]; 140 else 141 irqptr = NULL; 142 if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND)) 143 bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND; 144 if (bind_cpu != IRQ_UNBOUND) { 145 if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)) 146 bind_cpu = 0; 147 goto done; 148 } 149 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 150 do { 151 bind_cpu = xen_psm_next_bind_cpu++; 152 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 153 xen_psm_next_bind_cpu = 0; 154 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 155 } else { 156 bind_cpu = 0; 157 } 158 done: 159 return (bind_cpu); 160 } 161 162 /* 163 * Autoconfiguration Routines 164 */ 165 166 static int 167 xen_psm_probe(void) 168 { 169 int ret = PSM_SUCCESS; 170 171 if (DOMAIN_IS_INITDOMAIN(xen_info)) 172 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 173 return (ret); 174 } 175 176 static void 177 xen_psm_softinit(void) 178 { 179 /* LINTED logical expression always true: op "||" */ 180 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 181 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 182 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 183 apic_init_common(); 184 } 185 } 186 187 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 188 189 /*ARGSUSED*/ 190 static int 191 xen_psm_clkinit(int hertz) 192 { 193 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 194 extern int dosynctodr; 195 196 /* 197 * domU cannot set the TOD hardware, fault the TOD clock now to 198 * indicate that and turn off attempts to sync TOD hardware 199 * with the hires timer. 200 */ 201 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 202 mutex_enter(&tod_lock); 203 (void) tod_fault(TOD_RDONLY, 0); 204 dosynctodr = 0; 205 mutex_exit(&tod_lock); 206 } 207 /* 208 * The hypervisor provides a timer based on the local APIC timer. 209 * The interface supports requests of nanosecond resolution. 210 * A common frequency of the apic clock is 100 Mhz which 211 * gives a resolution of 10 nsec per tick. What we would really like 212 * is a way to get the ns per tick value from xen. 213 * XXPV - This is an assumption that needs checking and may change 214 */ 215 return (XEN_NSEC_PER_TICK); 216 } 217 218 static void 219 xen_psm_hrtimeinit(void) 220 { 221 extern int gethrtime_hires; 222 gethrtime_hires = 1; 223 } 224 225 /* xen_psm NMI handler */ 226 static uint_t 227 xen_psm_nmi_intr(caddr_t arg __unused, caddr_t arg1 __unused) 228 { 229 xen_psm_num_nmis++; 230 231 if (!lock_try(&xen_psm_nmi_lock)) 232 return (DDI_INTR_UNCLAIMED); 233 234 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 235 debug_enter("NMI received: entering kmdb\n"); 236 } else if (xen_psm_panic_on_nmi) { 237 /* Keep panic from entering kmdb. */ 238 nopanicdebug = 1; 239 panic("NMI received\n"); 240 } else { 241 /* 242 * prom_printf is the best shot we have of something which is 243 * problem free from high level/NMI type of interrupts 244 */ 245 prom_printf("NMI received\n"); 246 } 247 248 lock_clear(&xen_psm_nmi_lock); 249 return (DDI_INTR_CLAIMED); 250 } 251 252 static void 253 xen_psm_picinit() 254 { 255 int cpu, irqno; 256 cpuset_t cpus; 257 258 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 259 /* set a flag so we know we have run xen_psm_picinit() */ 260 apic_picinit_called = 1; 261 LOCK_INIT_CLEAR(&apic_ioapic_lock); 262 263 /* XXPV - do we need to do this? */ 264 picsetup(); /* initialise the 8259 */ 265 266 /* enable apic mode if imcr present */ 267 /* XXPV - do we need to do this either? */ 268 if (apic_imcrp) { 269 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 270 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 271 } 272 273 ioapic_init_intr(IOAPIC_NOMASK); 274 /* 275 * We never called xen_psm_addspl() when the SCI 276 * interrupt was added because that happened before the 277 * PSM module was loaded. Fix that up here by doing 278 * any missed operations (e.g. bind to CPU) 279 */ 280 if ((irqno = apic_sci_vect) > 0) { 281 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 282 CPUSET_ZERO(cpus); 283 CPUSET_OR(cpus, xen_psm_cpus_online); 284 } else { 285 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 286 } 287 ec_set_irq_affinity(irqno, cpus); 288 apic_irq_table[irqno]->airq_temp_cpu = 289 (uchar_t)(cpu & ~IRQ_USER_BOUND); 290 ec_enable_irq(irqno); 291 } 292 } 293 294 /* add nmi handler - least priority nmi handler */ 295 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 296 297 if (!psm_add_nmintr(0, xen_psm_nmi_intr, 298 "xVM_psm NMI handler", (caddr_t)NULL)) 299 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 300 } 301 302 303 /* 304 * generates an interprocessor interrupt to another CPU 305 */ 306 static void 307 xen_psm_send_ipi(int cpun, int ipl) 308 { 309 ulong_t flag = intr_clear(); 310 311 ec_send_ipi(ipl, cpun); 312 intr_restore(flag); 313 } 314 315 /*ARGSUSED*/ 316 static int 317 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 318 { 319 int cpu, ret; 320 cpuset_t cpus; 321 322 /* 323 * We are called at splhi() so we can't call anything that might end 324 * up trying to context switch. 325 */ 326 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 327 DOMAIN_IS_INITDOMAIN(xen_info)) { 328 /* 329 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 330 */ 331 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 332 } else { 333 /* 334 * Set priority/affinity/enable for non PIRQs 335 */ 336 ret = ec_set_irq_priority(irqno, ipl); 337 ASSERT(ret == 0); 338 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 339 CPUSET_ZERO(cpus); 340 CPUSET_OR(cpus, xen_psm_cpus_online); 341 } else { 342 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 343 } 344 ec_set_irq_affinity(irqno, cpus); 345 ec_enable_irq(irqno); 346 } 347 return (ret); 348 } 349 350 /* 351 * Acquire ownership of this irq on this cpu 352 */ 353 void 354 xen_psm_acquire_irq(int irq) 355 { 356 ulong_t flags; 357 int cpuid; 358 359 /* 360 * If the irq is currently being serviced by another cpu 361 * we busy-wait for the other cpu to finish. Take any 362 * pending interrupts before retrying. 363 */ 364 do { 365 flags = intr_clear(); 366 cpuid = ec_block_irq(irq); 367 intr_restore(flags); 368 } while (cpuid != CPU->cpu_id); 369 } 370 371 /*ARGSUSED*/ 372 static int 373 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 374 { 375 apic_irq_t *irqptr; 376 int err = PSM_SUCCESS; 377 378 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 379 DOMAIN_IS_INITDOMAIN(xen_info)) { 380 irqptr = apic_irq_table[irqno]; 381 /* 382 * unbind if no more sharers of this irq/evtchn 383 */ 384 if (irqptr->airq_share == 1) { 385 xen_psm_acquire_irq(irqno); 386 ec_unbind_irq(irqno); 387 } 388 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 389 /* 390 * If still in use reset priority 391 */ 392 if (!err && irqptr->airq_share != 0) { 393 err = ec_set_irq_priority(irqno, max_ipl); 394 return (err); 395 } 396 } else { 397 xen_psm_acquire_irq(irqno); 398 ec_unbind_irq(irqno); 399 } 400 return (err); 401 } 402 403 static processorid_t 404 xen_psm_get_next_processorid(processorid_t id) 405 { 406 if (id == -1) 407 return (0); 408 409 for (id++; id < NCPU; id++) { 410 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 411 case 0: /* yeah, that one's there */ 412 return (id); 413 default: 414 case X_EINVAL: /* out of range */ 415 return (-1); 416 case X_ENOENT: /* not present in the domain */ 417 /* 418 * It's not clear that we -need- to keep looking 419 * at this point, if, e.g., we can guarantee 420 * the hypervisor always keeps a contiguous range 421 * of vcpus around this is equivalent to "out of range". 422 * 423 * But it would be sad to miss a vcpu we're 424 * supposed to be using .. 425 */ 426 break; 427 } 428 } 429 430 return (-1); 431 } 432 433 /* 434 * XXPV - undo the start cpu op change; return to ignoring this value 435 * - also tweak error handling in main startup loop 436 */ 437 /*ARGSUSED*/ 438 static int 439 xen_psm_cpu_start(processorid_t id, caddr_t arg) 440 { 441 int ret; 442 443 ASSERT(id > 0); 444 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 445 ec_bind_cpu_ipis(id); 446 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 447 if ((ret = xen_vcpu_up(id)) == 0) 448 xen_psm_ncpus++; 449 else 450 ret = EINVAL; 451 return (ret); 452 } 453 454 /* 455 * Allocate an irq for inter cpu signaling 456 */ 457 /*ARGSUSED*/ 458 static int 459 xen_psm_get_ipivect(int ipl, int type) 460 { 461 return (ec_bind_ipi_to_irq(ipl, 0)); 462 } 463 464 /*ARGSUSED*/ 465 static int 466 xen_psm_get_clockirq(int ipl) 467 { 468 if (xen_clock_irq != INVALID_IRQ) 469 return (xen_clock_irq); 470 471 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 472 return (xen_clock_irq); 473 } 474 475 /*ARGSUSED*/ 476 static void 477 xen_psm_shutdown(int cmd, int fcn) 478 { 479 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 480 481 switch (cmd) { 482 case A_SHUTDOWN: 483 switch (fcn) { 484 case AD_BOOT: 485 case AD_IBOOT: 486 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 487 break; 488 case AD_POWEROFF: 489 /* fall through if domU or if poweroff fails */ 490 if (DOMAIN_IS_INITDOMAIN(xen_info)) 491 if (apic_enable_acpi) 492 (void) acpi_poweroff(); 493 /* FALLTHRU */ 494 case AD_HALT: 495 default: 496 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 497 break; 498 } 499 break; 500 case A_REBOOT: 501 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 502 break; 503 default: 504 return; 505 } 506 } 507 508 509 static int 510 xen_psm_translate_irq(dev_info_t *dip, int irqno) 511 { 512 if (dip == NULL) { 513 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 514 " dip = NULL\n", irqno)); 515 return (irqno); 516 } 517 return (irqno); 518 } 519 520 /* 521 * xen_psm_intr_enter() acks the event that triggered the interrupt and 522 * returns the new priority level, 523 */ 524 /*ARGSUSED*/ 525 static int 526 xen_psm_intr_enter(int ipl, int *vector) 527 { 528 int newipl; 529 uint_t intno; 530 cpu_t *cpu = CPU; 531 532 intno = (*vector); 533 534 ASSERT(intno < NR_IRQS); 535 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 536 537 if (!ec_is_edge_pirq(intno)) 538 ec_clear_irq(intno); 539 540 newipl = autovect[intno].avh_hi_pri; 541 if (newipl == 0) { 542 /* 543 * (newipl == 0) means we have no service routines for this 544 * vector. We will treat this as a spurious interrupt. 545 * We have cleared the pending bit already, clear the event 546 * mask and return a spurious interrupt. This case can happen 547 * when an interrupt delivery is racing with the removal of 548 * of the service routine for that interrupt. 549 */ 550 ec_unmask_irq(intno); 551 newipl = -1; /* flag spurious interrupt */ 552 } else if (newipl <= cpu->cpu_pri) { 553 /* 554 * (newipl <= cpu->cpu_pri) means that we must be trying to 555 * service a vector that was shared with a higher priority 556 * isr. The higher priority handler has been removed and 557 * we need to service this int. We can't return a lower 558 * priority than current cpu priority. Just synthesize a 559 * priority to return that should be acceptable. 560 * It should never happen that we synthesize a priority that 561 * moves us from low-priority to high-priority that would make 562 * a us incorrectly run on the high priority stack. 563 */ 564 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 565 ASSERT(newipl != LOCK_LEVEL + 1); 566 } 567 return (newipl); 568 } 569 570 571 /* 572 * xen_psm_intr_exit() restores the old interrupt 573 * priority level after processing an interrupt. 574 * It is called with interrupts disabled, and does not enable interrupts. 575 */ 576 /* ARGSUSED */ 577 static void 578 xen_psm_intr_exit(int ipl, int vector) 579 { 580 ec_try_unmask_irq(vector); 581 xen_psm_setspl(ipl); 582 } 583 584 intr_exit_fn_t 585 psm_intr_exit_fn(void) 586 { 587 return (xen_psm_intr_exit); 588 } 589 590 /* 591 * Check if new ipl level allows delivery of previously unserviced events 592 */ 593 static void 594 xen_psm_setspl(int ipl) 595 { 596 struct cpu *cpu = CPU; 597 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 598 uint16_t pending; 599 600 ASSERT(vci->evtchn_upcall_mask != 0); 601 602 /* 603 * If new ipl level will enable any pending interrupts, setup so the 604 * upcoming sti will cause us to get an upcall. 605 */ 606 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 607 if (pending) { 608 int i; 609 ulong_t pending_sels = 0; 610 volatile ulong_t *selp; 611 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 612 613 for (i = bsrw_insn(pending); i > ipl; i--) 614 pending_sels |= cpe->pending_sel[i]; 615 ASSERT(pending_sels); 616 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 617 atomic_or_ulong(selp, pending_sels); 618 vci->evtchn_upcall_pending = 1; 619 } 620 } 621 622 /* 623 * This function provides external interface to the nexus for all 624 * functionality related to the new DDI interrupt framework. 625 * 626 * Input: 627 * dip - pointer to the dev_info structure of the requested device 628 * hdlp - pointer to the internal interrupt handle structure for the 629 * requested interrupt 630 * intr_op - opcode for this call 631 * result - pointer to the integer that will hold the result to be 632 * passed back if return value is PSM_SUCCESS 633 * 634 * Output: 635 * return value is either PSM_SUCCESS or PSM_FAILURE 636 */ 637 int 638 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 639 psm_intr_op_t intr_op, int *result) 640 { 641 int cap; 642 int err; 643 int new_priority; 644 apic_irq_t *irqp; 645 struct intrspec *ispec; 646 647 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 648 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 649 650 switch (intr_op) { 651 case PSM_INTR_OP_CHECK_MSI: 652 /* 653 * Till PCI passthru is supported, only dom0 has MSI/MSIX 654 */ 655 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 656 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 657 DDI_INTR_TYPE_MSIX); 658 break; 659 } 660 /* 661 * Check MSI/X is supported or not at APIC level and 662 * masked off the MSI/X bits in hdlp->ih_type if not 663 * supported before return. If MSI/X is supported, 664 * leave the ih_type unchanged and return. 665 * 666 * hdlp->ih_type passed in from the nexus has all the 667 * interrupt types supported by the device. 668 */ 669 if (xen_support_msi == 0) { 670 /* 671 * if xen_support_msi is not set, call 672 * apic_check_msi_support() to check whether msi 673 * is supported first 674 */ 675 if (apic_check_msi_support() == PSM_SUCCESS) 676 xen_support_msi = 1; 677 else 678 xen_support_msi = -1; 679 } 680 if (xen_support_msi == 1) 681 *result = hdlp->ih_type; 682 else 683 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 684 DDI_INTR_TYPE_MSIX); 685 break; 686 case PSM_INTR_OP_ALLOC_VECTORS: 687 if (hdlp->ih_type == DDI_INTR_TYPE_MSI) 688 *result = apic_alloc_msi_vectors(dip, hdlp->ih_inum, 689 hdlp->ih_scratch1, hdlp->ih_pri, 690 (int)(uintptr_t)hdlp->ih_scratch2); 691 else 692 *result = apic_alloc_msix_vectors(dip, hdlp->ih_inum, 693 hdlp->ih_scratch1, hdlp->ih_pri, 694 (int)(uintptr_t)hdlp->ih_scratch2); 695 break; 696 case PSM_INTR_OP_FREE_VECTORS: 697 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 698 hdlp->ih_pri, hdlp->ih_type); 699 break; 700 case PSM_INTR_OP_NAVAIL_VECTORS: 701 /* 702 * XXPV - maybe we should make this be: 703 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 704 */ 705 if (DOMAIN_IS_INITDOMAIN(xen_info)) 706 *result = APIC_VECTOR_PER_IPL; 707 else 708 *result = 1; 709 break; 710 case PSM_INTR_OP_XLATE_VECTOR: 711 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 712 if (ispec->intrspec_vec >= PIRQ_BASE && 713 ispec->intrspec_vec < NR_PIRQS && 714 DOMAIN_IS_INITDOMAIN(xen_info)) { 715 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 716 } else { 717 *result = ispec->intrspec_vec; 718 } 719 break; 720 case PSM_INTR_OP_GET_PENDING: 721 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 722 *result = ec_pending_irq(hdlp->ih_vector); 723 break; 724 case PSM_INTR_OP_CLEAR_MASK: 725 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 726 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 727 return (PSM_FAILURE); 728 ec_enable_irq(hdlp->ih_vector); 729 break; 730 case PSM_INTR_OP_SET_MASK: 731 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 732 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 733 return (PSM_FAILURE); 734 ec_disable_irq(hdlp->ih_vector); 735 break; 736 case PSM_INTR_OP_GET_CAP: 737 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 738 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 739 cap |= DDI_INTR_FLAG_MASKABLE; 740 *result = cap; 741 break; 742 case PSM_INTR_OP_GET_SHARED: 743 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 744 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 745 return (PSM_FAILURE); 746 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 747 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 748 == NULL) 749 return (PSM_FAILURE); 750 *result = (irqp->airq_share > 1) ? 1: 0; 751 } else { 752 return (PSM_FAILURE); 753 } 754 break; 755 case PSM_INTR_OP_SET_PRI: 756 new_priority = *(int *)result; 757 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 758 if (err != 0) 759 return (PSM_FAILURE); 760 break; 761 case PSM_INTR_OP_GET_INTR: 762 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 763 return (PSM_FAILURE); 764 /* 765 * The interrupt handle given here has been allocated 766 * specifically for this command, and ih_private carries 767 * a pointer to a apic_get_intr_t. 768 */ 769 if (apic_get_vector_intr_info( 770 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 771 return (PSM_FAILURE); 772 break; 773 case PSM_INTR_OP_SET_CAP: 774 /* FALLTHRU */ 775 default: 776 return (PSM_FAILURE); 777 } 778 return (PSM_SUCCESS); 779 } 780 781 static void 782 xen_psm_rebind_irq(int irq) 783 { 784 cpuset_t ncpu; 785 processorid_t newcpu; 786 apic_irq_t *irqptr; 787 788 newcpu = xen_psm_bind_intr(irq); 789 if (newcpu == IRQ_UNBOUND) { 790 CPUSET_ZERO(ncpu); 791 CPUSET_OR(ncpu, xen_psm_cpus_online); 792 } else { 793 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 794 } 795 ec_set_irq_affinity(irq, ncpu); 796 if (irq <= APIC_MAX_VECTOR) { 797 irqptr = apic_irq_table[irq]; 798 ASSERT(irqptr != NULL); 799 irqptr->airq_temp_cpu = (uchar_t)newcpu; 800 } 801 } 802 803 /* 804 * Disable all device interrupts for the given cpu. 805 * High priority interrupts are not disabled and will still be serviced. 806 */ 807 static int 808 xen_psm_disable_intr(processorid_t cpun) 809 { 810 int irq; 811 812 /* 813 * Can't offline VCPU 0 on this hypervisor. There's no reason 814 * anyone would want to given that the CPUs are virtual. Also note 815 * that the hypervisor requires suspend/resume to be on VCPU 0. 816 */ 817 if (cpun == 0) 818 return (PSM_FAILURE); 819 820 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 821 for (irq = 0; irq < NR_IRQS; irq++) { 822 if (!ec_irq_needs_rebind(irq, cpun)) 823 continue; 824 xen_psm_rebind_irq(irq); 825 } 826 return (PSM_SUCCESS); 827 } 828 829 static void 830 xen_psm_enable_intr(processorid_t cpun) 831 { 832 int irq; 833 834 if (cpun == 0) 835 return; 836 837 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 838 839 /* 840 * Rebalance device interrupts among online processors 841 */ 842 for (irq = 0; irq < NR_IRQS; irq++) { 843 if (!ec_irq_rebindable(irq)) 844 continue; 845 xen_psm_rebind_irq(irq); 846 } 847 848 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 849 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 850 } 851 } 852 853 static int 854 xen_psm_post_cpu_start() 855 { 856 processorid_t cpun; 857 858 cpun = psm_get_cpu_id(); 859 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 860 /* 861 * Non-virtualized environments can call psm_post_cpu_start 862 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 863 * xen_psm_post_cpu_start() is only called from boot. 864 */ 865 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 866 } 867 return (PSM_SUCCESS); 868 } 869 870 /* 871 * This function will reprogram the timer. 872 * 873 * When in oneshot mode the argument is the absolute time in future at which to 874 * generate the interrupt. 875 * 876 * When in periodic mode, the argument is the interval at which the 877 * interrupts should be generated. There is no need to support the periodic 878 * mode timer change at this time. 879 * 880 * Note that we must be careful to convert from hrtime to Xen system time (see 881 * xpv_timestamp.c). 882 */ 883 static void 884 xen_psm_timer_reprogram(hrtime_t timer_req) 885 { 886 hrtime_t now, timer_new, time_delta, xen_time; 887 ulong_t flags; 888 889 flags = intr_clear(); 890 /* 891 * We should be called from high PIL context (CBE_HIGH_PIL), 892 * so kpreempt is disabled. 893 */ 894 895 now = xpv_gethrtime(); 896 xen_time = xpv_getsystime(); 897 if (timer_req <= now) { 898 /* 899 * requested to generate an interrupt in the past 900 * generate an interrupt as soon as possible 901 */ 902 time_delta = XEN_NSEC_PER_TICK; 903 } else 904 time_delta = timer_req - now; 905 906 timer_new = xen_time + time_delta; 907 if (HYPERVISOR_set_timer_op(timer_new) != 0) 908 panic("can't set hypervisor timer?"); 909 intr_restore(flags); 910 } 911 912 /* 913 * This function will enable timer interrupts. 914 */ 915 static void 916 xen_psm_timer_enable(void) 917 { 918 ec_unmask_irq(xen_clock_irq); 919 } 920 921 /* 922 * This function will disable timer interrupts on the current cpu. 923 */ 924 static void 925 xen_psm_timer_disable(void) 926 { 927 (void) ec_block_irq(xen_clock_irq); 928 /* 929 * If the clock irq is pending on this cpu then we need to 930 * clear the pending interrupt. 931 */ 932 ec_unpend_irq(xen_clock_irq); 933 } 934 935 /* 936 * 937 * The following functions are in the platform specific file so that they 938 * can be different functions depending on whether we are running on 939 * bare metal or a hypervisor. 940 */ 941 942 /* 943 * Allocate a free vector for irq at ipl. 944 */ 945 /* ARGSUSED */ 946 uchar_t 947 apic_allocate_vector(int ipl, int irq, int pri) 948 { 949 physdev_irq_t irq_op; 950 uchar_t vector; 951 int rc; 952 953 irq_op.irq = irq; 954 955 if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 956 != 0) 957 panic("Hypervisor alloc vector failed err: %d", -rc); 958 vector = irq_op.vector; 959 /* 960 * No need to worry about vector colliding with our reserved vectors 961 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 962 * generated traps and handle them properly. 963 */ 964 apic_vector_to_irq[vector] = (uchar_t)irq; 965 return (vector); 966 } 967 968 /* Mark vector as not being used by any irq */ 969 void 970 apic_free_vector(uchar_t vector) 971 { 972 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 973 } 974 975 /* 976 * This function returns the no. of vectors available for the pri. 977 * dip is not used at this moment. If we really don't need that, 978 * it will be removed. Since priority is not limited by hardware 979 * when running on the hypervisor we simply return the maximum no. 980 * of available contiguous vectors. 981 */ 982 /*ARGSUSED*/ 983 int 984 apic_navail_vector(dev_info_t *dip, int pri) 985 { 986 int lowest, highest, i, navail, count; 987 988 DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n", 989 (void *)dip, pri)); 990 991 highest = APIC_MAX_VECTOR; 992 lowest = APIC_BASE_VECT; 993 navail = count = 0; 994 995 /* It has to be contiguous */ 996 for (i = lowest; i < highest; i++) { 997 count = 0; 998 while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) && 999 (i < highest)) { 1000 count++; 1001 i++; 1002 } 1003 if (count > navail) 1004 navail = count; 1005 } 1006 return (navail); 1007 } 1008 1009 static physdev_manage_pci_t *managed_devlist; 1010 static int mdev_cnt; 1011 static int mdev_size = 128; 1012 static uchar_t msi_vector_to_pirq[APIC_MAX_VECTOR+1]; 1013 1014 /* 1015 * Add devfn on given bus to devices managed by hypervisor 1016 */ 1017 static int 1018 xen_manage_device(uint8_t bus, uint8_t devfn) 1019 { 1020 physdev_manage_pci_t manage_pci, *newlist; 1021 int rc, i, oldsize; 1022 1023 /* 1024 * Check if bus/devfn already managed. If so just return success. 1025 */ 1026 if (managed_devlist == NULL) { 1027 managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) * 1028 mdev_size, KM_NOSLEEP); 1029 if (managed_devlist == NULL) { 1030 cmn_err(CE_WARN, 1031 "Can't alloc space for managed device list"); 1032 return (0); 1033 } 1034 }; 1035 for (i = 0; i < mdev_cnt; i++) { 1036 if (managed_devlist[i].bus == bus && 1037 managed_devlist[i].devfn == devfn) 1038 return (1); /* device already managed */ 1039 } 1040 manage_pci.bus = bus; 1041 manage_pci.devfn = devfn; 1042 rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci); 1043 if (rc < 0) { 1044 cmn_err(CE_WARN, 1045 "hypervisor add pci device call failed bus:0x%x" 1046 " devfn:0x%x", bus, devfn); 1047 return (0); 1048 } 1049 /* 1050 * Add device to the managed device list 1051 */ 1052 if (i == mdev_size) { 1053 /* 1054 * grow the managed device list 1055 */ 1056 oldsize = mdev_size * sizeof (physdev_manage_pci_t); 1057 mdev_size *= 2; 1058 newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size, 1059 KM_NOSLEEP); 1060 if (newlist == NULL) { 1061 cmn_err(CE_WARN, "Can't grow managed device list"); 1062 return (0); 1063 } 1064 bcopy(managed_devlist, newlist, oldsize); 1065 kmem_free(managed_devlist, oldsize); 1066 managed_devlist = newlist; 1067 } 1068 managed_devlist[i].bus = bus; 1069 managed_devlist[i].devfn = devfn; 1070 mdev_cnt++; 1071 return (1); 1072 } 1073 1074 /* 1075 * allocate an apic irq struct for an MSI interrupt 1076 */ 1077 static int 1078 msi_allocate_irq(int irq) 1079 { 1080 apic_irq_t *irqptr = apic_irq_table[irq]; 1081 1082 if (irqptr == NULL) { 1083 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1084 if (irqptr == NULL) { 1085 cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ"); 1086 return (-1); 1087 } 1088 apic_irq_table[irq] = irqptr; 1089 } else { 1090 if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0) 1091 irqptr->airq_mps_intr_index = FREE_INDEX; 1092 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 1093 cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use"); 1094 return (-1); 1095 } 1096 } 1097 irqptr->airq_mps_intr_index = FREE_INDEX; 1098 return (irq); 1099 } 1100 1101 /* 1102 * read MSI/MSIX vector out of config space 1103 */ 1104 static uchar_t 1105 xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry) 1106 { 1107 uint64_t msi_data = 0; 1108 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip); 1109 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip); 1110 ushort_t msi_ctrl; 1111 uchar_t vector; 1112 1113 ASSERT((handle != NULL) && (cap_ptr != 0)); 1114 if (type == DDI_INTR_TYPE_MSI) { 1115 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1116 /* 1117 * Get vector 1118 */ 1119 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1120 msi_data = pci_config_get16(handle, 1121 cap_ptr + PCI_MSI_64BIT_DATA); 1122 } else { 1123 msi_data = pci_config_get16(handle, 1124 cap_ptr + PCI_MSI_32BIT_DATA); 1125 } 1126 vector = (msi_data & 0xff) + entry; 1127 } else if (type == DDI_INTR_TYPE_MSIX) { 1128 uintptr_t off; 1129 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1130 1131 /* Offset into the given entry in the MSI-X table */ 1132 off = (uintptr_t)msix_p->msix_tbl_addr + 1133 (entry * PCI_MSIX_VECTOR_SIZE); 1134 1135 msi_data = ddi_get32(msix_p->msix_tbl_hdl, 1136 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET)); 1137 vector = msi_data & 0xff; 1138 } 1139 return (vector); 1140 } 1141 1142 1143 static void 1144 get_busdevfn(dev_info_t *dip, int *busp, int *devfnp) 1145 { 1146 pci_regspec_t *regspec; 1147 int reglen; 1148 1149 /* 1150 * Get device reg spec, first word has PCI bus and 1151 * device/function info we need. 1152 */ 1153 if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg", 1154 (caddr_t)®spec, ®len) != DDI_SUCCESS) { 1155 cmn_err(CE_WARN, 1156 "get_busdevfn() failed to get regspec."); 1157 return; 1158 } 1159 /* 1160 * get PCI bus # from reg spec for device 1161 */ 1162 *busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi); 1163 /* 1164 * get combined device/function from reg spec for device. 1165 */ 1166 *devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >> 1167 PCI_REG_FUNC_SHIFT; 1168 1169 kmem_free(regspec, reglen); 1170 } 1171 1172 /* 1173 * This function allocates "count" MSI vector(s) for the given "dip/pri/type" 1174 */ 1175 int 1176 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri, 1177 int behavior) 1178 { 1179 int rcount, i, rc, irqno; 1180 uchar_t vector, cpu; 1181 major_t major; 1182 apic_irq_t *irqptr; 1183 physdev_map_pirq_t map_irq; 1184 int busnum, devfn; 1185 1186 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p " 1187 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 1188 (void *)dip, inum, pri, count, behavior)); 1189 1190 if (count > 1) { 1191 if (behavior == DDI_INTR_ALLOC_STRICT && 1192 apic_multi_msi_enable == 0) 1193 return (0); 1194 if (apic_multi_msi_enable == 0) 1195 count = 1; 1196 } 1197 1198 if ((rcount = apic_navail_vector(dip, pri)) > count) 1199 rcount = count; 1200 else if (rcount == 0 || (rcount < count && 1201 behavior == DDI_INTR_ALLOC_STRICT)) 1202 return (0); 1203 1204 /* if not ISP2, then round it down */ 1205 if (!ISP2(rcount)) 1206 rcount = 1 << (highbit(rcount) - 1); 1207 1208 /* 1209 * get PCI bus # and devfn from reg spec for device 1210 */ 1211 get_busdevfn(dip, &busnum, &devfn); 1212 1213 /* 1214 * Tell xen about this pci device 1215 */ 1216 if (!xen_manage_device(busnum, devfn)) 1217 return (0); 1218 1219 mutex_enter(&airq_mutex); 1220 1221 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1222 for (i = 0; i < rcount; i++) { 1223 /* 1224 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq 1225 */ 1226 map_irq.domid = DOMID_SELF; 1227 map_irq.type = MAP_PIRQ_TYPE_MSI; 1228 map_irq.index = -rcount; /* hypervisor auto allocates vectors */ 1229 map_irq.pirq = -1; 1230 map_irq.bus = busnum; 1231 map_irq.devfn = devfn; 1232 map_irq.entry_nr = i; 1233 map_irq.table_base = 0; 1234 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1235 irqno = map_irq.pirq; 1236 if (rc < 0) { 1237 mutex_exit(&airq_mutex); 1238 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1239 return (i); 1240 } 1241 if (irqno < 0) { 1242 mutex_exit(&airq_mutex); 1243 cmn_err(CE_NOTE, 1244 "!hypervisor not configured for MSI support"); 1245 xen_support_msi = -1; 1246 return (0); 1247 } 1248 1249 /* 1250 * Find out what vector the hypervisor assigned 1251 */ 1252 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i); 1253 1254 if (msi_allocate_irq(irqno) < 0) { 1255 mutex_exit(&airq_mutex); 1256 return (i); 1257 } 1258 apic_max_device_irq = max(irqno, apic_max_device_irq); 1259 apic_min_device_irq = min(irqno, apic_min_device_irq); 1260 irqptr = apic_irq_table[irqno]; 1261 ASSERT(irqptr != NULL); 1262 #ifdef DEBUG 1263 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1264 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: " 1265 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1266 #endif 1267 apic_vector_to_irq[vector] = (uchar_t)irqno; 1268 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1269 1270 irqptr->airq_vector = vector; 1271 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1272 irqptr->airq_intin_no = (uchar_t)rcount; 1273 irqptr->airq_ipl = pri; 1274 irqptr->airq_origirq = (uchar_t)(inum + i); 1275 irqptr->airq_share_id = 0; 1276 irqptr->airq_mps_intr_index = MSI_INDEX; 1277 irqptr->airq_dip = dip; 1278 irqptr->airq_major = major; 1279 if (i == 0) /* they all bind to the same cpu */ 1280 cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno); 1281 else 1282 irqptr->airq_cpu = cpu; 1283 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x " 1284 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1285 (void *)irqptr->airq_dip, irqptr->airq_vector, 1286 irqptr->airq_origirq, pri)); 1287 } 1288 mutex_exit(&airq_mutex); 1289 return (rcount); 1290 } 1291 1292 /* 1293 * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type" 1294 */ 1295 int 1296 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri, 1297 int behavior) 1298 { 1299 int rcount, i, rc; 1300 major_t major; 1301 physdev_map_pirq_t map_irq; 1302 int busnum, devfn; 1303 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip); 1304 uint64_t table_base; 1305 pfn_t pfnum; 1306 1307 if (msix_p == NULL) { 1308 msix_p = pci_msix_init(dip); 1309 if (msix_p != NULL) { 1310 i_ddi_set_msix(dip, msix_p); 1311 } else { 1312 cmn_err(CE_WARN, "apic_alloc_msix_vectors()" 1313 " msix_init failed"); 1314 return (0); 1315 } 1316 } 1317 /* 1318 * Hypervisor wants PCI config space address of msix table base 1319 */ 1320 pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) & 1321 ~PFN_IS_FOREIGN_MFN; 1322 table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset | 1323 ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET)); 1324 /* 1325 * get PCI bus # and devfn from reg spec for device 1326 */ 1327 get_busdevfn(dip, &busnum, &devfn); 1328 1329 /* 1330 * Tell xen about this pci device 1331 */ 1332 if (!xen_manage_device(busnum, devfn)) 1333 return (0); 1334 mutex_enter(&airq_mutex); 1335 1336 if ((rcount = apic_navail_vector(dip, pri)) > count) 1337 rcount = count; 1338 else if (rcount == 0 || (rcount < count && 1339 behavior == DDI_INTR_ALLOC_STRICT)) { 1340 rcount = 0; 1341 goto out; 1342 } 1343 1344 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1345 for (i = 0; i < rcount; i++) { 1346 int irqno; 1347 uchar_t vector; 1348 apic_irq_t *irqptr; 1349 1350 /* 1351 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq 1352 */ 1353 map_irq.domid = DOMID_SELF; 1354 map_irq.type = MAP_PIRQ_TYPE_MSI; 1355 map_irq.index = -1; /* hypervisor auto allocates vector */ 1356 map_irq.pirq = -1; 1357 map_irq.bus = busnum; 1358 map_irq.devfn = devfn; 1359 map_irq.entry_nr = i; 1360 map_irq.table_base = table_base; 1361 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); 1362 irqno = map_irq.pirq; 1363 if (rc < 0) { 1364 mutex_exit(&airq_mutex); 1365 cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc); 1366 return (i); 1367 } 1368 if (irqno < 0) { 1369 mutex_exit(&airq_mutex); 1370 cmn_err(CE_NOTE, 1371 "!hypervisor not configured for MSI support"); 1372 xen_support_msi = -1; 1373 return (0); 1374 } 1375 /* 1376 * Find out what vector the hypervisor assigned 1377 */ 1378 vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i); 1379 1380 if (msi_allocate_irq(irqno) < 0) { 1381 mutex_exit(&airq_mutex); 1382 return (i); 1383 } 1384 apic_vector_to_irq[vector] = (uchar_t)irqno; 1385 msi_vector_to_pirq[vector] = (uchar_t)irqno; 1386 apic_max_device_irq = max(irqno, apic_max_device_irq); 1387 apic_min_device_irq = min(irqno, apic_min_device_irq); 1388 irqptr = apic_irq_table[irqno]; 1389 ASSERT(irqptr != NULL); 1390 irqptr->airq_vector = (uchar_t)vector; 1391 irqptr->airq_ipl = pri; 1392 irqptr->airq_origirq = (uchar_t)(inum + i); 1393 irqptr->airq_share_id = 0; 1394 irqptr->airq_mps_intr_index = MSIX_INDEX; 1395 irqptr->airq_dip = dip; 1396 irqptr->airq_major = major; 1397 irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */ 1398 } 1399 out: 1400 mutex_exit(&airq_mutex); 1401 return (rcount); 1402 } 1403 1404 1405 /* 1406 * This finds the apic_irq_t associated with the dip, ispec and type. 1407 * The entry should have already been freed, but it can not have been 1408 * reused yet since the hypervisor can not have reassigned the pirq since 1409 * we have not freed that yet. 1410 */ 1411 static apic_irq_t * 1412 msi_find_irq(dev_info_t *dip, struct intrspec *ispec) 1413 { 1414 apic_irq_t *irqp; 1415 int i; 1416 1417 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 1418 if ((irqp = apic_irq_table[i]) == NULL) 1419 continue; 1420 if ((irqp->airq_dip == dip) && 1421 (irqp->airq_origirq == ispec->intrspec_vec) && 1422 (irqp->airq_ipl == ispec->intrspec_pri)) { 1423 return (irqp); 1424 } 1425 } 1426 return (NULL); 1427 } 1428 1429 void 1430 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type) 1431 { 1432 int i, rc; 1433 physdev_unmap_pirq_t unmap_pirq; 1434 apic_irq_t *irqptr; 1435 struct intrspec ispec; 1436 1437 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x " 1438 "count: %x pri: %x type: %x\n", 1439 (void *)dip, inum, count, pri, type)); 1440 1441 /* for MSI/X only */ 1442 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) 1443 return; 1444 1445 for (i = 0; i < count; i++) { 1446 DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x " 1447 "pri=0x%x count=0x%x\n", inum, pri, count)); 1448 ispec.intrspec_vec = inum + i; 1449 ispec.intrspec_pri = pri; 1450 if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) { 1451 cmn_err(CE_WARN, 1452 "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x", 1453 ddi_get_name(dip), ddi_get_name_addr(dip), 1454 (void *)dip, inum + i, pri); 1455 continue; 1456 } 1457 /* 1458 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq 1459 */ 1460 unmap_pirq.domid = DOMID_SELF; 1461 unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector]; 1462 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq); 1463 if (rc < 0) { 1464 cmn_err(CE_WARN, "unmap pirq failed"); 1465 return; 1466 } 1467 irqptr->airq_mps_intr_index = FREE_INDEX; 1468 apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ; 1469 } 1470 } 1471 1472 /* 1473 * The hypervisor doesn't permit access to local apics directly 1474 */ 1475 /* ARGSUSED */ 1476 uint32_t * 1477 mapin_apic(uint32_t addr, size_t len, int flags) 1478 { 1479 /* 1480 * Return a pointer to a memory area to fake out the 1481 * probe code that wants to read apic registers. 1482 * The dummy values will end up being ignored by xen 1483 * later on when they are used anyway. 1484 */ 1485 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1486 return (xen_psm_dummy_apic); 1487 } 1488 1489 /* ARGSUSED */ 1490 uint32_t * 1491 mapin_ioapic(uint32_t addr, size_t len, int flags) 1492 { 1493 /* 1494 * Return non-null here to fake out configure code that calls this. 1495 * The i86xpv platform will not reference through the returned value.. 1496 */ 1497 return ((uint32_t *)0x1); 1498 } 1499 1500 /* ARGSUSED */ 1501 void 1502 mapout_apic(caddr_t addr, size_t len) 1503 { 1504 } 1505 1506 /* ARGSUSED */ 1507 void 1508 mapout_ioapic(caddr_t addr, size_t len) 1509 { 1510 } 1511 1512 uint32_t 1513 ioapic_read(int apic_ix, uint32_t reg) 1514 { 1515 physdev_apic_t apic; 1516 1517 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1518 apic.reg = reg; 1519 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1520 panic("read ioapic %d reg %d failed", apic_ix, reg); 1521 return (apic.value); 1522 } 1523 1524 void 1525 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1526 { 1527 physdev_apic_t apic; 1528 1529 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1530 apic.reg = reg; 1531 apic.value = value; 1532 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1533 panic("write ioapic %d reg %d failed", apic_ix, reg); 1534 } 1535 1536 /* 1537 * This function was added as part of x2APIC support in pcplusmp. 1538 */ 1539 void 1540 ioapic_write_eoi(int apic_ix, uint32_t value) 1541 { 1542 physdev_apic_t apic; 1543 1544 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1545 apic.reg = APIC_IO_EOI; 1546 apic.value = value; 1547 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1548 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1549 } 1550 1551 /* 1552 * This function was added as part of x2APIC support in pcplusmp to resolve 1553 * undefined symbol in xpv_psm. 1554 */ 1555 void 1556 x2apic_update_psm() 1557 { 1558 } 1559 1560 /* 1561 * This function was added as part of x2APIC support in pcplusmp to resolve 1562 * undefined symbol in xpv_psm. 1563 */ 1564 void 1565 apic_ret() 1566 { 1567 } 1568 1569 /* 1570 * Call rebind to do the actual programming. 1571 */ 1572 int 1573 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1574 { 1575 apic_irq_t *irqptr; 1576 struct ioapic_reprogram_data *drep = NULL; 1577 int rv, cpu; 1578 cpuset_t cpus; 1579 1580 if (deferred) { 1581 drep = (struct ioapic_reprogram_data *)p; 1582 ASSERT(drep != NULL); 1583 irqptr = drep->irqp; 1584 } else { 1585 irqptr = (apic_irq_t *)p; 1586 } 1587 ASSERT(irqptr != NULL); 1588 /* 1589 * Set cpu based on xen idea of online cpu's not apic tables. 1590 * Note that xen ignores/sets to it's own preferred value the 1591 * target cpu field when programming ioapic anyway. 1592 */ 1593 if (irqptr->airq_mps_intr_index == MSI_INDEX) 1594 cpu = irqptr->airq_cpu; /* MSI cpus are already set */ 1595 else { 1596 cpu = xen_psm_bind_intr(irq); 1597 irqptr->airq_cpu = cpu; 1598 } 1599 if (cpu == IRQ_UNBOUND) { 1600 CPUSET_ZERO(cpus); 1601 CPUSET_OR(cpus, xen_psm_cpus_online); 1602 } else { 1603 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1604 } 1605 rv = apic_rebind(irqptr, cpu, drep); 1606 if (rv) { 1607 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1608 cpu = 0; 1609 irqptr->airq_cpu = cpu; 1610 rv = apic_rebind(irqptr, cpu, drep); 1611 } 1612 /* 1613 * If rebind successful bind the irq to an event channel 1614 */ 1615 if (rv == 0) { 1616 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1617 CPUSET_FIND(cpus, cpu); 1618 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1619 } 1620 return (rv); 1621 } 1622 1623 /* 1624 * Allocate a new vector for the given irq 1625 */ 1626 /* ARGSUSED */ 1627 uchar_t 1628 apic_modify_vector(uchar_t vector, int irq) 1629 { 1630 return (apic_allocate_vector(0, irq, 0)); 1631 } 1632 1633 /* 1634 * The rest of the file is just generic psm module boilerplate 1635 */ 1636 1637 static struct psm_ops xen_psm_ops = { 1638 xen_psm_probe, /* psm_probe */ 1639 1640 xen_psm_softinit, /* psm_init */ 1641 xen_psm_picinit, /* psm_picinit */ 1642 xen_psm_intr_enter, /* psm_intr_enter */ 1643 xen_psm_intr_exit, /* psm_intr_exit */ 1644 xen_psm_setspl, /* psm_setspl */ 1645 xen_psm_addspl, /* psm_addspl */ 1646 xen_psm_delspl, /* psm_delspl */ 1647 xen_psm_disable_intr, /* psm_disable_intr */ 1648 xen_psm_enable_intr, /* psm_enable_intr */ 1649 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1650 (void (*)(int))NULL, /* psm_set_softintr */ 1651 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1652 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1653 1654 xen_psm_clkinit, /* psm_clkinit */ 1655 xen_psm_get_clockirq, /* psm_get_clockirq */ 1656 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1657 xpv_gethrtime, /* psm_gethrtime */ 1658 1659 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1660 xen_psm_cpu_start, /* psm_cpu_start */ 1661 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1662 xen_psm_shutdown, /* psm_shutdown */ 1663 xen_psm_get_ipivect, /* psm_get_ipivect */ 1664 xen_psm_send_ipi, /* psm_send_ipi */ 1665 1666 xen_psm_translate_irq, /* psm_translate_irq */ 1667 1668 (void (*)(int, char *))NULL, /* psm_notify_error */ 1669 (void (*)(int msg))NULL, /* psm_notify_func */ 1670 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1671 xen_psm_timer_enable, /* psm_timer_enable */ 1672 xen_psm_timer_disable, /* psm_timer_disable */ 1673 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1674 (void (*)(int, int))NULL, /* psm_preshutdown */ 1675 xen_intr_ops, /* Advanced DDI Interrupt framework */ 1676 (int (*)(psm_state_request_t *))NULL, /* psm_state */ 1677 (int (*)(psm_cpu_request_t *))NULL, /* psm_cpu_ops */ 1678 1679 (int (*)(void))NULL, /* psm_get_pir_ipivect */ 1680 (void (*)(processorid_t))NULL, /* psm_send_pir_ipi */ 1681 (void (*)(processorid_t, boolean_t))NULL /* psm_cmci_setup */ 1682 }; 1683 1684 static struct psm_info xen_psm_info = { 1685 PSM_INFO_VER01_5, /* version */ 1686 PSM_OWN_EXCLUSIVE, /* ownership */ 1687 &xen_psm_ops, /* operation */ 1688 "xVM_psm", /* machine name */ 1689 "platform module" /* machine descriptions */ 1690 }; 1691 1692 static void *xen_psm_hdlp; 1693 1694 int 1695 _init(void) 1696 { 1697 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1698 } 1699 1700 int 1701 _fini(void) 1702 { 1703 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1704 } 1705 1706 int 1707 _info(struct modinfo *modinfop) 1708 { 1709 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1710 } 1711