1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #define PSMI_1_5 28 29 #include <sys/mutex.h> 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/clock.h> 33 #include <sys/machlock.h> 34 #include <sys/smp_impldefs.h> 35 #include <sys/uadmin.h> 36 #include <sys/promif.h> 37 #include <sys/psm.h> 38 #include <sys/psm_common.h> 39 #include <sys/atomic.h> 40 #include <sys/apic.h> 41 #include <sys/archsystm.h> 42 #include <sys/mach_intr.h> 43 #include <sys/hypervisor.h> 44 #include <sys/evtchn_impl.h> 45 #include <sys/modctl.h> 46 #include <sys/trap.h> 47 #include <sys/panic.h> 48 49 #include <xen/public/vcpu.h> 50 #include <xen/public/physdev.h> 51 52 53 /* 54 * Global Data 55 */ 56 57 int xen_psm_verbose = 0; 58 59 /* As of now we don't support x2apic in xVM */ 60 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 61 int apic_error = 0; 62 int apic_verbose = 0; 63 cpuset_t apic_cpumask; 64 int apic_forceload = 0; 65 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 66 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 67 }; 68 uchar_t apic_ipltopri[MAXIPL + 1]; 69 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 70 uint_t apic_picinit_called; 71 apic_cpus_info_t *apic_cpus; 72 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 73 /* use to make sure only one cpu handles the nmi */ 74 static lock_t xen_psm_nmi_lock; 75 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 76 int xen_psm_panic_on_nmi = 0; 77 int xen_psm_num_nmis = 0; 78 79 cpuset_t xen_psm_cpus_online; /* online cpus */ 80 int xen_psm_ncpus = 1; /* cpu count */ 81 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 82 83 /* 84 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 85 * support MSI at all. Change this initialization to zero when MSI is 86 * supported. 87 */ 88 int xen_support_msi = -1; 89 90 static int xen_clock_irq = INVALID_IRQ; 91 92 /* flag definitions for xen_psm_verbose */ 93 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 94 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 95 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 96 97 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 98 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 99 cmn_err fmt; 100 101 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 102 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 103 prom_printf fmt; 104 105 /* 106 * Dummy apic array to point common routines at that want to do some apic 107 * manipulation. Xen doesn't allow guest apic access so we point at these 108 * memory locations to fake out those who want to do apic fiddling. 109 */ 110 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 111 112 static struct psm_info xen_psm_info; 113 static void xen_psm_setspl(int); 114 115 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 116 117 /* 118 * Local support routines 119 */ 120 121 /* 122 * Select vcpu to bind xen virtual device interrupt to. 123 */ 124 /*ARGSUSED*/ 125 int 126 xen_psm_bind_intr(int irq) 127 { 128 int bind_cpu, test_cpu; 129 apic_irq_t *irqptr; 130 131 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 132 return (IRQ_UNBOUND); 133 if (irq <= APIC_MAX_VECTOR) 134 irqptr = apic_irq_table[irq]; 135 else 136 irqptr = NULL; 137 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 138 bind_cpu = irqptr->airq_cpu; 139 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 140 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 141 bind_cpu = 0; 142 goto done; 143 } 144 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 145 do { 146 bind_cpu = xen_psm_next_bind_cpu++; 147 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 148 xen_psm_next_bind_cpu = 0; 149 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 150 } else { 151 bind_cpu = 0; 152 } 153 done: 154 return (bind_cpu); 155 } 156 157 /* 158 * Autoconfiguration Routines 159 */ 160 161 static int 162 xen_psm_probe(void) 163 { 164 int ret = PSM_SUCCESS; 165 166 if (DOMAIN_IS_INITDOMAIN(xen_info)) 167 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 168 return (ret); 169 } 170 171 static void 172 xen_psm_softinit(void) 173 { 174 /* LINTED logical expression always true: op "||" */ 175 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 176 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 177 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 178 apic_init_common(); 179 } 180 } 181 182 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 183 184 /*ARGSUSED*/ 185 static int 186 xen_psm_clkinit(int hertz) 187 { 188 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 189 extern int dosynctodr; 190 191 /* 192 * domU cannot set the TOD hardware, fault the TOD clock now to 193 * indicate that and turn off attempts to sync TOD hardware 194 * with the hires timer. 195 */ 196 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 197 mutex_enter(&tod_lock); 198 (void) tod_fault(TOD_RDONLY, 0); 199 dosynctodr = 0; 200 mutex_exit(&tod_lock); 201 } 202 /* 203 * The hypervisor provides a timer based on the local APIC timer. 204 * The interface supports requests of nanosecond resolution. 205 * A common frequency of the apic clock is 100 Mhz which 206 * gives a resolution of 10 nsec per tick. What we would really like 207 * is a way to get the ns per tick value from xen. 208 * XXPV - This is an assumption that needs checking and may change 209 */ 210 return (XEN_NSEC_PER_TICK); 211 } 212 213 static void 214 xen_psm_hrtimeinit(void) 215 { 216 extern int gethrtime_hires; 217 gethrtime_hires = 1; 218 } 219 220 /* xen_psm NMI handler */ 221 /*ARGSUSED*/ 222 static void 223 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 224 { 225 xen_psm_num_nmis++; 226 227 if (!lock_try(&xen_psm_nmi_lock)) 228 return; 229 230 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 231 debug_enter("NMI received: entering kmdb\n"); 232 } else if (xen_psm_panic_on_nmi) { 233 /* Keep panic from entering kmdb. */ 234 nopanicdebug = 1; 235 panic("NMI received\n"); 236 } else { 237 /* 238 * prom_printf is the best shot we have of something which is 239 * problem free from high level/NMI type of interrupts 240 */ 241 prom_printf("NMI received\n"); 242 } 243 244 lock_clear(&xen_psm_nmi_lock); 245 } 246 247 static void 248 xen_psm_picinit() 249 { 250 int cpu, irqno; 251 cpuset_t cpus; 252 253 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 254 /* set a flag so we know we have run xen_psm_picinit() */ 255 apic_picinit_called = 1; 256 LOCK_INIT_CLEAR(&apic_ioapic_lock); 257 258 /* XXPV - do we need to do this? */ 259 picsetup(); /* initialise the 8259 */ 260 261 /* enable apic mode if imcr present */ 262 /* XXPV - do we need to do this either? */ 263 if (apic_imcrp) { 264 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 265 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 266 } 267 268 ioapic_init_intr(IOAPIC_NOMASK); 269 /* 270 * We never called xen_psm_addspl() when the SCI 271 * interrupt was added because that happened before the 272 * PSM module was loaded. Fix that up here by doing 273 * any missed operations (e.g. bind to CPU) 274 */ 275 if ((irqno = apic_sci_vect) > 0) { 276 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 277 CPUSET_ZERO(cpus); 278 CPUSET_OR(cpus, xen_psm_cpus_online); 279 } else { 280 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 281 } 282 ec_set_irq_affinity(irqno, cpus); 283 apic_irq_table[irqno]->airq_temp_cpu = 284 (uchar_t)(cpu & ~IRQ_USER_BOUND); 285 ec_enable_irq(irqno); 286 } 287 } 288 289 /* add nmi handler - least priority nmi handler */ 290 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 291 292 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 293 "xVM_psm NMI handler", (caddr_t)NULL)) 294 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 295 } 296 297 298 /* 299 * generates an interprocessor interrupt to another CPU 300 */ 301 static void 302 xen_psm_send_ipi(int cpun, int ipl) 303 { 304 ulong_t flag = intr_clear(); 305 306 ec_send_ipi(ipl, cpun); 307 intr_restore(flag); 308 } 309 310 /*ARGSUSED*/ 311 static int 312 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 313 { 314 int cpu, ret; 315 cpuset_t cpus; 316 317 /* 318 * We are called at splhi() so we can't call anything that might end 319 * up trying to context switch. 320 */ 321 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 322 DOMAIN_IS_INITDOMAIN(xen_info)) { 323 /* 324 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 325 */ 326 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 327 } else { 328 /* 329 * Set priority/affinity/enable for non PIRQs 330 */ 331 ret = ec_set_irq_priority(irqno, ipl); 332 ASSERT(ret == 0); 333 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 334 CPUSET_ZERO(cpus); 335 CPUSET_OR(cpus, xen_psm_cpus_online); 336 } else { 337 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 338 } 339 ec_set_irq_affinity(irqno, cpus); 340 ec_enable_irq(irqno); 341 } 342 return (ret); 343 } 344 345 /* 346 * Acquire ownership of this irq on this cpu 347 */ 348 void 349 xen_psm_acquire_irq(int irq) 350 { 351 ulong_t flags; 352 int cpuid; 353 354 /* 355 * If the irq is currently being serviced by another cpu 356 * we busy-wait for the other cpu to finish. Take any 357 * pending interrupts before retrying. 358 */ 359 do { 360 flags = intr_clear(); 361 cpuid = ec_block_irq(irq); 362 intr_restore(flags); 363 } while (cpuid != CPU->cpu_id); 364 } 365 366 /*ARGSUSED*/ 367 static int 368 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 369 { 370 apic_irq_t *irqptr; 371 int err = PSM_SUCCESS; 372 373 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 374 DOMAIN_IS_INITDOMAIN(xen_info)) { 375 irqptr = apic_irq_table[irqno]; 376 /* 377 * unbind if no more sharers of this irq/evtchn 378 */ 379 if (irqptr->airq_share == 1) { 380 xen_psm_acquire_irq(irqno); 381 ec_unbind_irq(irqno); 382 } 383 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 384 /* 385 * If still in use reset priority 386 */ 387 if (!err && irqptr->airq_share != 0) { 388 err = ec_set_irq_priority(irqno, max_ipl); 389 return (err); 390 } 391 } else { 392 xen_psm_acquire_irq(irqno); 393 ec_unbind_irq(irqno); 394 } 395 return (err); 396 } 397 398 static processorid_t 399 xen_psm_get_next_processorid(processorid_t id) 400 { 401 if (id == -1) 402 return (0); 403 404 for (id++; id < NCPU; id++) { 405 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 406 case 0: /* yeah, that one's there */ 407 return (id); 408 default: 409 case X_EINVAL: /* out of range */ 410 return (-1); 411 case X_ENOENT: /* not present in the domain */ 412 /* 413 * It's not clear that we -need- to keep looking 414 * at this point, if, e.g., we can guarantee 415 * the hypervisor always keeps a contiguous range 416 * of vcpus around this is equivalent to "out of range". 417 * 418 * But it would be sad to miss a vcpu we're 419 * supposed to be using .. 420 */ 421 break; 422 } 423 } 424 425 return (-1); 426 } 427 428 /* 429 * XXPV - undo the start cpu op change; return to ignoring this value 430 * - also tweak error handling in main startup loop 431 */ 432 /*ARGSUSED*/ 433 static int 434 xen_psm_cpu_start(processorid_t id, caddr_t arg) 435 { 436 int ret; 437 438 ASSERT(id > 0); 439 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 440 ec_bind_cpu_ipis(id); 441 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 442 if ((ret = xen_vcpu_up(id)) == 0) 443 xen_psm_ncpus++; 444 else 445 ret = EINVAL; 446 return (ret); 447 } 448 449 /* 450 * Allocate an irq for inter cpu signaling 451 */ 452 /*ARGSUSED*/ 453 static int 454 xen_psm_get_ipivect(int ipl, int type) 455 { 456 return (ec_bind_ipi_to_irq(ipl, 0)); 457 } 458 459 /*ARGSUSED*/ 460 static int 461 xen_psm_get_clockirq(int ipl) 462 { 463 if (xen_clock_irq != INVALID_IRQ) 464 return (xen_clock_irq); 465 466 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 467 return (xen_clock_irq); 468 } 469 470 /*ARGSUSED*/ 471 static void 472 xen_psm_shutdown(int cmd, int fcn) 473 { 474 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 475 476 switch (cmd) { 477 case A_SHUTDOWN: 478 switch (fcn) { 479 case AD_BOOT: 480 case AD_IBOOT: 481 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 482 break; 483 case AD_POWEROFF: 484 /* fall through if domU or if poweroff fails */ 485 if (DOMAIN_IS_INITDOMAIN(xen_info)) 486 if (apic_enable_acpi) 487 (void) acpi_poweroff(); 488 /* FALLTHRU */ 489 case AD_HALT: 490 default: 491 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 492 break; 493 } 494 break; 495 case A_REBOOT: 496 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 497 break; 498 default: 499 return; 500 } 501 } 502 503 504 static int 505 xen_psm_translate_irq(dev_info_t *dip, int irqno) 506 { 507 if (dip == NULL) { 508 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 509 " dip = NULL\n", irqno)); 510 return (irqno); 511 } 512 return (irqno); 513 } 514 515 /* 516 * xen_psm_intr_enter() acks the event that triggered the interrupt and 517 * returns the new priority level, 518 */ 519 /*ARGSUSED*/ 520 static int 521 xen_psm_intr_enter(int ipl, int *vector) 522 { 523 int newipl; 524 uint_t intno; 525 cpu_t *cpu = CPU; 526 527 intno = (*vector); 528 529 ASSERT(intno < NR_IRQS); 530 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 531 532 ec_clear_irq(intno); 533 534 newipl = autovect[intno].avh_hi_pri; 535 if (newipl == 0) { 536 /* 537 * (newipl == 0) means we have no service routines for this 538 * vector. We will treat this as a spurious interrupt. 539 * We have cleared the pending bit already, clear the event 540 * mask and return a spurious interrupt. This case can happen 541 * when an interrupt delivery is racing with the removal of 542 * of the service routine for that interrupt. 543 */ 544 ec_unmask_irq(intno); 545 newipl = -1; /* flag spurious interrupt */ 546 } else if (newipl <= cpu->cpu_pri) { 547 /* 548 * (newipl <= cpu->cpu_pri) means that we must be trying to 549 * service a vector that was shared with a higher priority 550 * isr. The higher priority handler has been removed and 551 * we need to service this int. We can't return a lower 552 * priority than current cpu priority. Just synthesize a 553 * priority to return that should be acceptable. 554 */ 555 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 556 } 557 return (newipl); 558 } 559 560 561 /* 562 * xen_psm_intr_exit() restores the old interrupt 563 * priority level after processing an interrupt. 564 * It is called with interrupts disabled, and does not enable interrupts. 565 */ 566 /* ARGSUSED */ 567 static void 568 xen_psm_intr_exit(int ipl, int vector) 569 { 570 ec_try_unmask_irq(vector); 571 xen_psm_setspl(ipl); 572 } 573 574 intr_exit_fn_t 575 psm_intr_exit_fn(void) 576 { 577 return (xen_psm_intr_exit); 578 } 579 580 /* 581 * Check if new ipl level allows delivery of previously unserviced events 582 */ 583 static void 584 xen_psm_setspl(int ipl) 585 { 586 struct cpu *cpu = CPU; 587 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 588 uint16_t pending; 589 590 ASSERT(vci->evtchn_upcall_mask != 0); 591 592 /* 593 * If new ipl level will enable any pending interrupts, setup so the 594 * upcoming sti will cause us to get an upcall. 595 */ 596 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 597 if (pending) { 598 int i; 599 ulong_t pending_sels = 0; 600 volatile ulong_t *selp; 601 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 602 603 for (i = bsrw_insn(pending); i > ipl; i--) 604 pending_sels |= cpe->pending_sel[i]; 605 ASSERT(pending_sels); 606 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 607 atomic_or_ulong(selp, pending_sels); 608 vci->evtchn_upcall_pending = 1; 609 } 610 } 611 612 /* 613 * This function provides external interface to the nexus for all 614 * functionality related to the new DDI interrupt framework. 615 * 616 * Input: 617 * dip - pointer to the dev_info structure of the requested device 618 * hdlp - pointer to the internal interrupt handle structure for the 619 * requested interrupt 620 * intr_op - opcode for this call 621 * result - pointer to the integer that will hold the result to be 622 * passed back if return value is PSM_SUCCESS 623 * 624 * Output: 625 * return value is either PSM_SUCCESS or PSM_FAILURE 626 */ 627 int 628 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 629 psm_intr_op_t intr_op, int *result) 630 { 631 int cap; 632 int err; 633 int new_priority; 634 apic_irq_t *irqp; 635 struct intrspec *ispec; 636 637 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 638 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 639 640 switch (intr_op) { 641 case PSM_INTR_OP_CHECK_MSI: 642 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 643 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 644 DDI_INTR_TYPE_MSIX); 645 break; 646 } 647 /* 648 * Check MSI/X is supported or not at APIC level and 649 * masked off the MSI/X bits in hdlp->ih_type if not 650 * supported before return. If MSI/X is supported, 651 * leave the ih_type unchanged and return. 652 * 653 * hdlp->ih_type passed in from the nexus has all the 654 * interrupt types supported by the device. 655 */ 656 if (xen_support_msi == 0) { 657 /* 658 * if xen_support_msi is not set, call 659 * apic_check_msi_support() to check whether msi 660 * is supported first 661 */ 662 if (apic_check_msi_support() == PSM_SUCCESS) 663 xen_support_msi = 1; 664 else 665 xen_support_msi = -1; 666 } 667 if (xen_support_msi == 1) 668 *result = hdlp->ih_type; 669 else 670 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 671 DDI_INTR_TYPE_MSIX); 672 break; 673 case PSM_INTR_OP_ALLOC_VECTORS: 674 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 675 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 676 (int)(uintptr_t)hdlp->ih_scratch2); 677 break; 678 case PSM_INTR_OP_FREE_VECTORS: 679 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 680 hdlp->ih_pri, hdlp->ih_type); 681 break; 682 case PSM_INTR_OP_NAVAIL_VECTORS: 683 /* 684 * XXPV - maybe we should make this be: 685 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 686 */ 687 if (DOMAIN_IS_INITDOMAIN(xen_info)) 688 *result = APIC_VECTOR_PER_IPL; 689 else 690 *result = 1; 691 break; 692 case PSM_INTR_OP_XLATE_VECTOR: 693 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 694 if (ispec->intrspec_vec >= PIRQ_BASE && 695 ispec->intrspec_vec < NR_PIRQS && 696 DOMAIN_IS_INITDOMAIN(xen_info)) { 697 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 698 } else { 699 *result = ispec->intrspec_vec; 700 } 701 break; 702 case PSM_INTR_OP_GET_PENDING: 703 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 704 *result = ec_pending_irq(hdlp->ih_vector); 705 break; 706 case PSM_INTR_OP_CLEAR_MASK: 707 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 708 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 709 return (PSM_FAILURE); 710 ec_enable_irq(hdlp->ih_vector); 711 break; 712 case PSM_INTR_OP_SET_MASK: 713 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 714 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 715 return (PSM_FAILURE); 716 ec_disable_irq(hdlp->ih_vector); 717 break; 718 case PSM_INTR_OP_GET_CAP: 719 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 720 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 721 cap |= DDI_INTR_FLAG_MASKABLE; 722 *result = cap; 723 break; 724 case PSM_INTR_OP_GET_SHARED: 725 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 726 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 727 return (PSM_FAILURE); 728 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 729 == NULL) 730 return (PSM_FAILURE); 731 *result = irqp->airq_share ? 1: 0; 732 } else { 733 return (PSM_FAILURE); 734 } 735 break; 736 case PSM_INTR_OP_SET_PRI: 737 new_priority = *(int *)result; 738 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 739 if (err != 0) 740 return (PSM_FAILURE); 741 break; 742 case PSM_INTR_OP_GET_INTR: 743 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 744 return (PSM_FAILURE); 745 /* 746 * The interrupt handle given here has been allocated 747 * specifically for this command, and ih_private carries 748 * a pointer to a apic_get_intr_t. 749 */ 750 if (apic_get_vector_intr_info( 751 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 752 return (PSM_FAILURE); 753 break; 754 case PSM_INTR_OP_SET_CAP: 755 /* FALLTHRU */ 756 default: 757 return (PSM_FAILURE); 758 } 759 return (PSM_SUCCESS); 760 } 761 762 static void 763 xen_psm_rebind_irq(int irq) 764 { 765 cpuset_t ncpu; 766 processorid_t newcpu; 767 apic_irq_t *irqptr; 768 769 newcpu = xen_psm_bind_intr(irq); 770 if (newcpu == IRQ_UNBOUND) { 771 CPUSET_ZERO(ncpu); 772 CPUSET_OR(ncpu, xen_psm_cpus_online); 773 } else { 774 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 775 } 776 ec_set_irq_affinity(irq, ncpu); 777 if (irq <= APIC_MAX_VECTOR) { 778 irqptr = apic_irq_table[irq]; 779 ASSERT(irqptr != NULL); 780 irqptr->airq_temp_cpu = (uchar_t)newcpu; 781 } 782 } 783 784 /* 785 * Disable all device interrupts for the given cpu. 786 * High priority interrupts are not disabled and will still be serviced. 787 */ 788 static int 789 xen_psm_disable_intr(processorid_t cpun) 790 { 791 int irq; 792 793 /* 794 * Can't offline VCPU 0 on this hypervisor. There's no reason 795 * anyone would want to given that the CPUs are virtual. Also note 796 * that the hypervisor requires suspend/resume to be on VCPU 0. 797 */ 798 if (cpun == 0) 799 return (PSM_FAILURE); 800 801 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 802 for (irq = 0; irq < NR_IRQS; irq++) { 803 if (!ec_irq_needs_rebind(irq, cpun)) 804 continue; 805 xen_psm_rebind_irq(irq); 806 } 807 return (PSM_SUCCESS); 808 } 809 810 static void 811 xen_psm_enable_intr(processorid_t cpun) 812 { 813 int irq; 814 815 if (cpun == 0) 816 return; 817 818 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 819 820 /* 821 * Rebalance device interrupts among online processors 822 */ 823 for (irq = 0; irq < NR_IRQS; irq++) { 824 if (!ec_irq_rebindable(irq)) 825 continue; 826 xen_psm_rebind_irq(irq); 827 } 828 829 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 830 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 831 } 832 } 833 834 static int 835 xen_psm_post_cpu_start() 836 { 837 processorid_t cpun; 838 839 cpun = psm_get_cpu_id(); 840 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 841 /* 842 * Non-virtualized environments can call psm_post_cpu_start 843 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 844 * xen_psm_post_cpu_start() is only called from boot. 845 */ 846 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 847 } 848 return (PSM_SUCCESS); 849 } 850 851 /* 852 * This function will reprogram the timer. 853 * 854 * When in oneshot mode the argument is the absolute time in future at which to 855 * generate the interrupt. 856 * 857 * When in periodic mode, the argument is the interval at which the 858 * interrupts should be generated. There is no need to support the periodic 859 * mode timer change at this time. 860 * 861 * Note that we must be careful to convert from hrtime to Xen system time (see 862 * xpv_timestamp.c). 863 */ 864 static void 865 xen_psm_timer_reprogram(hrtime_t timer_req) 866 { 867 hrtime_t now, timer_new, time_delta, xen_time; 868 ulong_t flags; 869 870 flags = intr_clear(); 871 /* 872 * We should be called from high PIL context (CBE_HIGH_PIL), 873 * so kpreempt is disabled. 874 */ 875 876 now = xpv_gethrtime(); 877 xen_time = xpv_getsystime(); 878 if (timer_req <= now) { 879 /* 880 * requested to generate an interrupt in the past 881 * generate an interrupt as soon as possible 882 */ 883 time_delta = XEN_NSEC_PER_TICK; 884 } else 885 time_delta = timer_req - now; 886 887 timer_new = xen_time + time_delta; 888 if (HYPERVISOR_set_timer_op(timer_new) != 0) 889 panic("can't set hypervisor timer?"); 890 intr_restore(flags); 891 } 892 893 /* 894 * This function will enable timer interrupts. 895 */ 896 static void 897 xen_psm_timer_enable(void) 898 { 899 ec_unmask_irq(xen_clock_irq); 900 } 901 902 /* 903 * This function will disable timer interrupts on the current cpu. 904 */ 905 static void 906 xen_psm_timer_disable(void) 907 { 908 (void) ec_block_irq(xen_clock_irq); 909 /* 910 * If the clock irq is pending on this cpu then we need to 911 * clear the pending interrupt. 912 */ 913 ec_unpend_irq(xen_clock_irq); 914 } 915 916 /* 917 * 918 * The following functions are in the platform specific file so that they 919 * can be different functions depending on whether we are running on 920 * bare metal or a hypervisor. 921 */ 922 923 /* 924 * Allocate a free vector for irq at ipl. 925 */ 926 /* ARGSUSED */ 927 uchar_t 928 apic_allocate_vector(int ipl, int irq, int pri) 929 { 930 physdev_irq_t irq_op; 931 uchar_t vector; 932 933 irq_op.irq = irq; 934 935 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 936 panic("Hypervisor alloc vector failed"); 937 vector = irq_op.vector; 938 /* 939 * No need to worry about vector colliding with our reserved vectors 940 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 941 * generated traps and handle them properly. 942 */ 943 apic_vector_to_irq[vector] = (uchar_t)irq; 944 return (vector); 945 } 946 947 /* Mark vector as not being used by any irq */ 948 void 949 apic_free_vector(uchar_t vector) 950 { 951 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 952 } 953 954 /* 955 * This function allocate "count" vector(s) for the given "dip/pri/type" 956 */ 957 static int 958 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 959 int behavior) 960 { 961 int rcount, i; 962 uchar_t vector, cpu; 963 int irqno; 964 major_t major; 965 apic_irq_t *irqptr; 966 967 /* only supports MSI at the moment, will add MSI-X support later */ 968 if (type != DDI_INTR_TYPE_MSI) 969 return (0); 970 971 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 972 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 973 (void *)dip, type, inum, pri, count, behavior)); 974 975 if (count > 1) { 976 if (behavior == DDI_INTR_ALLOC_STRICT && 977 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 978 return (0); 979 980 if (apic_multi_msi_enable == 0) 981 count = 1; 982 else if (count > apic_multi_msi_max) 983 count = apic_multi_msi_max; 984 } 985 986 /* 987 * XXPV - metal version takes all vectors avail at given pri. 988 * Why do that? For now just allocate count vectors. 989 */ 990 rcount = count; 991 992 mutex_enter(&airq_mutex); 993 994 /* 995 * XXPV - currently the hypervisor does not support MSI at all. 996 * It doesn't return consecutive vectors. This code is a first 997 * cut for the (future) time that MSI is supported. 998 */ 999 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1000 for (i = 0; i < rcount; i++) { 1001 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 1002 INVALID_IRQ) { 1003 mutex_exit(&airq_mutex); 1004 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1005 "apic_allocate_irq failed\n")); 1006 return (i); 1007 } 1008 apic_max_device_irq = max(irqno, apic_max_device_irq); 1009 apic_min_device_irq = min(irqno, apic_min_device_irq); 1010 irqptr = apic_irq_table[irqno]; 1011 vector = apic_allocate_vector(pri, irqno, 0); 1012 apic_vector_to_irq[vector] = (uchar_t)irqno; 1013 #ifdef DEBUG 1014 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1015 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1016 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1017 #endif 1018 1019 irqptr->airq_vector = vector; 1020 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1021 irqptr->airq_intin_no = (uchar_t)rcount; 1022 irqptr->airq_ipl = pri; 1023 irqptr->airq_origirq = (uchar_t)(inum + i); 1024 irqptr->airq_share_id = 0; 1025 irqptr->airq_mps_intr_index = MSI_INDEX; 1026 irqptr->airq_dip = dip; 1027 irqptr->airq_major = major; 1028 if (i == 0) /* they all bound to the same cpu */ 1029 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1030 0xff, 0xff); 1031 else 1032 irqptr->airq_cpu = cpu; 1033 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1034 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1035 (void *)irqptr->airq_dip, irqptr->airq_vector, 1036 irqptr->airq_origirq, pri)); 1037 } 1038 mutex_exit(&airq_mutex); 1039 return (rcount); 1040 } 1041 1042 /* 1043 * The hypervisor doesn't permit access to local apics directly 1044 */ 1045 /* ARGSUSED */ 1046 uint32_t * 1047 mapin_apic(uint32_t addr, size_t len, int flags) 1048 { 1049 /* 1050 * Return a pointer to a memory area to fake out the 1051 * probe code that wants to read apic registers. 1052 * The dummy values will end up being ignored by xen 1053 * later on when they are used anyway. 1054 */ 1055 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1056 return (xen_psm_dummy_apic); 1057 } 1058 1059 /* ARGSUSED */ 1060 uint32_t * 1061 mapin_ioapic(uint32_t addr, size_t len, int flags) 1062 { 1063 /* 1064 * Return non-null here to fake out configure code that calls this. 1065 * The i86xpv platform will not reference through the returned value.. 1066 */ 1067 return ((uint32_t *)0x1); 1068 } 1069 1070 /* ARGSUSED */ 1071 void 1072 mapout_apic(caddr_t addr, size_t len) 1073 { 1074 } 1075 1076 /* ARGSUSED */ 1077 void 1078 mapout_ioapic(caddr_t addr, size_t len) 1079 { 1080 } 1081 1082 uint32_t 1083 ioapic_read(int apic_ix, uint32_t reg) 1084 { 1085 physdev_apic_t apic; 1086 1087 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1088 apic.reg = reg; 1089 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1090 panic("read ioapic %d reg %d failed", apic_ix, reg); 1091 return (apic.value); 1092 } 1093 1094 void 1095 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1096 { 1097 physdev_apic_t apic; 1098 1099 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1100 apic.reg = reg; 1101 apic.value = value; 1102 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1103 panic("write ioapic %d reg %d failed", apic_ix, reg); 1104 } 1105 1106 /* 1107 * This function was added as part of x2APIC support in pcplusmp. 1108 */ 1109 void 1110 ioapic_write_eoi(int apic_ix, uint32_t value) 1111 { 1112 physdev_apic_t apic; 1113 1114 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1115 apic.reg = APIC_IO_EOI; 1116 apic.value = value; 1117 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1118 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1119 } 1120 1121 /* 1122 * This function was added as part of x2APIC support in pcplusmp to resolve 1123 * undefined symbol in xpv_psm. 1124 */ 1125 void 1126 x2apic_update_psm() 1127 { 1128 } 1129 1130 /* 1131 * This function was added as part of x2APIC support in pcplusmp to resolve 1132 * undefined symbol in xpv_psm. 1133 */ 1134 void 1135 apic_ret() 1136 { 1137 } 1138 1139 /* 1140 * Call rebind to do the actual programming. 1141 */ 1142 int 1143 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1144 { 1145 apic_irq_t *irqptr; 1146 struct ioapic_reprogram_data *drep = NULL; 1147 int rv, cpu; 1148 cpuset_t cpus; 1149 1150 /* 1151 * Set cpu based on xen idea of online cpu's not apic tables. 1152 * Note that xen ignores/sets to it's own preferred value the 1153 * target cpu field when programming ioapic anyway. 1154 */ 1155 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1156 CPUSET_ZERO(cpus); 1157 CPUSET_OR(cpus, xen_psm_cpus_online); 1158 } else { 1159 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1160 } 1161 apic_irq_table[irq]->airq_cpu = cpu; 1162 if (deferred) { 1163 drep = (struct ioapic_reprogram_data *)p; 1164 ASSERT(drep != NULL); 1165 irqptr = drep->irqp; 1166 } else { 1167 irqptr = (apic_irq_t *)p; 1168 } 1169 ASSERT(irqptr != NULL); 1170 rv = apic_rebind(irqptr, cpu, drep); 1171 if (rv) { 1172 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1173 cpu = 0; 1174 rv = apic_rebind(irqptr, cpu, drep); 1175 } 1176 /* 1177 * If rebind successful bind the irq to an event channel 1178 */ 1179 if (rv == 0) { 1180 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1181 CPUSET_FIND(cpus, cpu); 1182 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1183 } 1184 return (rv); 1185 } 1186 1187 /* 1188 * Allocate a new vector for the given irq 1189 */ 1190 /* ARGSUSED */ 1191 uchar_t 1192 apic_modify_vector(uchar_t vector, int irq) 1193 { 1194 return (apic_allocate_vector(0, irq, 0)); 1195 } 1196 1197 /* 1198 * The rest of the file is just generic psm module boilerplate 1199 */ 1200 1201 static struct psm_ops xen_psm_ops = { 1202 xen_psm_probe, /* psm_probe */ 1203 1204 xen_psm_softinit, /* psm_init */ 1205 xen_psm_picinit, /* psm_picinit */ 1206 xen_psm_intr_enter, /* psm_intr_enter */ 1207 xen_psm_intr_exit, /* psm_intr_exit */ 1208 xen_psm_setspl, /* psm_setspl */ 1209 xen_psm_addspl, /* psm_addspl */ 1210 xen_psm_delspl, /* psm_delspl */ 1211 xen_psm_disable_intr, /* psm_disable_intr */ 1212 xen_psm_enable_intr, /* psm_enable_intr */ 1213 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1214 (void (*)(int))NULL, /* psm_set_softintr */ 1215 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1216 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1217 1218 xen_psm_clkinit, /* psm_clkinit */ 1219 xen_psm_get_clockirq, /* psm_get_clockirq */ 1220 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1221 xpv_gethrtime, /* psm_gethrtime */ 1222 1223 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1224 xen_psm_cpu_start, /* psm_cpu_start */ 1225 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1226 xen_psm_shutdown, /* psm_shutdown */ 1227 xen_psm_get_ipivect, /* psm_get_ipivect */ 1228 xen_psm_send_ipi, /* psm_send_ipi */ 1229 1230 xen_psm_translate_irq, /* psm_translate_irq */ 1231 1232 (void (*)(int, char *))NULL, /* psm_notify_error */ 1233 (void (*)(int msg))NULL, /* psm_notify_func */ 1234 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1235 xen_psm_timer_enable, /* psm_timer_enable */ 1236 xen_psm_timer_disable, /* psm_timer_disable */ 1237 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1238 (void (*)(int, int))NULL, /* psm_preshutdown */ 1239 xen_intr_ops /* Advanced DDI Interrupt framework */ 1240 }; 1241 1242 static struct psm_info xen_psm_info = { 1243 PSM_INFO_VER01_5, /* version */ 1244 PSM_OWN_EXCLUSIVE, /* ownership */ 1245 &xen_psm_ops, /* operation */ 1246 "xVM_psm", /* machine name */ 1247 "platform module" /* machine descriptions */ 1248 }; 1249 1250 static void *xen_psm_hdlp; 1251 1252 int 1253 _init(void) 1254 { 1255 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1256 } 1257 1258 int 1259 _fini(void) 1260 { 1261 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1262 } 1263 1264 int 1265 _info(struct modinfo *modinfop) 1266 { 1267 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1268 } 1269