1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define PSMI_1_5 30 31 #include <sys/mutex.h> 32 #include <sys/types.h> 33 #include <sys/time.h> 34 #include <sys/clock.h> 35 #include <sys/machlock.h> 36 #include <sys/smp_impldefs.h> 37 #include <sys/uadmin.h> 38 #include <sys/promif.h> 39 #include <sys/psm.h> 40 #include <sys/psm_common.h> 41 #include <sys/atomic.h> 42 #include <sys/apic.h> 43 #include <sys/archsystm.h> 44 #include <sys/mach_intr.h> 45 #include <sys/hypervisor.h> 46 #include <sys/evtchn_impl.h> 47 #include <sys/modctl.h> 48 #include <sys/trap.h> 49 #include <sys/panic.h> 50 51 #include <xen/public/vcpu.h> 52 #include <xen/public/physdev.h> 53 54 55 /* 56 * Global Data 57 */ 58 59 int xen_psm_verbose = 0; 60 61 /* As of now we don't support x2apic in xVM */ 62 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 63 int apic_error = 0; 64 int apic_verbose = 0; 65 cpuset_t apic_cpumask; 66 int apic_forceload = 0; 67 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 68 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 69 }; 70 uchar_t apic_ipltopri[MAXIPL + 1]; 71 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 72 uint_t apic_picinit_called; 73 apic_cpus_info_t *apic_cpus; 74 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 75 /* use to make sure only one cpu handles the nmi */ 76 static lock_t xen_psm_nmi_lock; 77 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 78 int xen_psm_panic_on_nmi = 0; 79 int xen_psm_num_nmis = 0; 80 81 cpuset_t xen_psm_cpus_online; /* online cpus */ 82 int xen_psm_ncpus = 1; /* cpu count */ 83 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 84 85 /* 86 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 87 * support MSI at all. Change this initialization to zero when MSI is 88 * supported. 89 */ 90 int xen_support_msi = -1; 91 92 static int xen_clock_irq = INVALID_IRQ; 93 94 /* flag definitions for xen_psm_verbose */ 95 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 96 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 97 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 98 99 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 100 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 101 cmn_err fmt; 102 103 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 104 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 105 prom_printf fmt; 106 107 /* 108 * Dummy apic array to point common routines at that want to do some apic 109 * manipulation. Xen doesn't allow guest apic access so we point at these 110 * memory locations to fake out those who want to do apic fiddling. 111 */ 112 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 113 114 static struct psm_info xen_psm_info; 115 static void xen_psm_setspl(int); 116 117 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 118 119 /* 120 * Local support routines 121 */ 122 123 /* 124 * Select vcpu to bind xen virtual device interrupt to. 125 */ 126 /*ARGSUSED*/ 127 int 128 xen_psm_bind_intr(int irq) 129 { 130 int bind_cpu, test_cpu; 131 apic_irq_t *irqptr; 132 133 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 134 return (IRQ_UNBOUND); 135 if (irq <= APIC_MAX_VECTOR) 136 irqptr = apic_irq_table[irq]; 137 else 138 irqptr = NULL; 139 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 140 bind_cpu = irqptr->airq_cpu; 141 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 142 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 143 bind_cpu = 0; 144 goto done; 145 } 146 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 147 do { 148 bind_cpu = xen_psm_next_bind_cpu++; 149 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 150 xen_psm_next_bind_cpu = 0; 151 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 152 } else { 153 bind_cpu = 0; 154 } 155 done: 156 return (bind_cpu); 157 } 158 159 /* 160 * Autoconfiguration Routines 161 */ 162 163 static int 164 xen_psm_probe(void) 165 { 166 int ret = PSM_SUCCESS; 167 168 if (DOMAIN_IS_INITDOMAIN(xen_info)) 169 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 170 return (ret); 171 } 172 173 static void 174 xen_psm_softinit(void) 175 { 176 /* LINTED logical expression always true: op "||" */ 177 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 178 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 179 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 180 apic_init_common(); 181 } 182 } 183 184 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 185 186 /*ARGSUSED*/ 187 static int 188 xen_psm_clkinit(int hertz) 189 { 190 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 191 extern int dosynctodr; 192 193 /* 194 * domU cannot set the TOD hardware, fault the TOD clock now to 195 * indicate that and turn off attempts to sync TOD hardware 196 * with the hires timer. 197 */ 198 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 199 mutex_enter(&tod_lock); 200 (void) tod_fault(TOD_RDONLY, 0); 201 dosynctodr = 0; 202 mutex_exit(&tod_lock); 203 } 204 /* 205 * The hypervisor provides a timer based on the local APIC timer. 206 * The interface supports requests of nanosecond resolution. 207 * A common frequency of the apic clock is 100 Mhz which 208 * gives a resolution of 10 nsec per tick. What we would really like 209 * is a way to get the ns per tick value from xen. 210 * XXPV - This is an assumption that needs checking and may change 211 */ 212 return (XEN_NSEC_PER_TICK); 213 } 214 215 static void 216 xen_psm_hrtimeinit(void) 217 { 218 extern int gethrtime_hires; 219 gethrtime_hires = 1; 220 } 221 222 /* xen_psm NMI handler */ 223 /*ARGSUSED*/ 224 static void 225 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 226 { 227 xen_psm_num_nmis++; 228 229 if (!lock_try(&xen_psm_nmi_lock)) 230 return; 231 232 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 233 debug_enter("NMI received: entering kmdb\n"); 234 } else if (xen_psm_panic_on_nmi) { 235 /* Keep panic from entering kmdb. */ 236 nopanicdebug = 1; 237 panic("NMI received\n"); 238 } else { 239 /* 240 * prom_printf is the best shot we have of something which is 241 * problem free from high level/NMI type of interrupts 242 */ 243 prom_printf("NMI received\n"); 244 } 245 246 lock_clear(&xen_psm_nmi_lock); 247 } 248 249 static void 250 xen_psm_picinit() 251 { 252 int cpu, irqno; 253 cpuset_t cpus; 254 255 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 256 /* set a flag so we know we have run xen_psm_picinit() */ 257 apic_picinit_called = 1; 258 LOCK_INIT_CLEAR(&apic_ioapic_lock); 259 260 /* XXPV - do we need to do this? */ 261 picsetup(); /* initialise the 8259 */ 262 263 /* enable apic mode if imcr present */ 264 /* XXPV - do we need to do this either? */ 265 if (apic_imcrp) { 266 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 267 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 268 } 269 270 ioapic_init_intr(IOAPIC_NOMASK); 271 /* 272 * We never called xen_psm_addspl() when the SCI 273 * interrupt was added because that happened before the 274 * PSM module was loaded. Fix that up here by doing 275 * any missed operations (e.g. bind to CPU) 276 */ 277 if ((irqno = apic_sci_vect) > 0) { 278 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 279 CPUSET_ZERO(cpus); 280 CPUSET_OR(cpus, xen_psm_cpus_online); 281 } else { 282 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 283 } 284 ec_set_irq_affinity(irqno, cpus); 285 apic_irq_table[irqno]->airq_temp_cpu = 286 (uchar_t)(cpu & ~IRQ_USER_BOUND); 287 ec_enable_irq(irqno); 288 } 289 } 290 291 /* add nmi handler - least priority nmi handler */ 292 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 293 294 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 295 "xVM_psm NMI handler", (caddr_t)NULL)) 296 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 297 } 298 299 300 /* 301 * generates an interprocessor interrupt to another CPU 302 */ 303 static void 304 xen_psm_send_ipi(int cpun, int ipl) 305 { 306 ulong_t flag = intr_clear(); 307 308 ec_send_ipi(ipl, cpun); 309 intr_restore(flag); 310 } 311 312 /*ARGSUSED*/ 313 static int 314 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 315 { 316 int cpu, ret; 317 cpuset_t cpus; 318 319 /* 320 * We are called at splhi() so we can't call anything that might end 321 * up trying to context switch. 322 */ 323 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 324 DOMAIN_IS_INITDOMAIN(xen_info)) { 325 /* 326 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 327 */ 328 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 329 } else { 330 /* 331 * Set priority/affinity/enable for non PIRQs 332 */ 333 ret = ec_set_irq_priority(irqno, ipl); 334 ASSERT(ret == 0); 335 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 336 CPUSET_ZERO(cpus); 337 CPUSET_OR(cpus, xen_psm_cpus_online); 338 } else { 339 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 340 } 341 ec_set_irq_affinity(irqno, cpus); 342 ec_enable_irq(irqno); 343 } 344 return (ret); 345 } 346 347 /* 348 * Acquire ownership of this irq on this cpu 349 */ 350 void 351 xen_psm_acquire_irq(int irq) 352 { 353 ulong_t flags; 354 int cpuid; 355 356 /* 357 * If the irq is currently being serviced by another cpu 358 * we busy-wait for the other cpu to finish. Take any 359 * pending interrupts before retrying. 360 */ 361 do { 362 flags = intr_clear(); 363 cpuid = ec_block_irq(irq); 364 intr_restore(flags); 365 } while (cpuid != CPU->cpu_id); 366 } 367 368 /*ARGSUSED*/ 369 static int 370 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 371 { 372 apic_irq_t *irqptr; 373 int err = PSM_SUCCESS; 374 375 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 376 DOMAIN_IS_INITDOMAIN(xen_info)) { 377 irqptr = apic_irq_table[irqno]; 378 /* 379 * unbind if no more sharers of this irq/evtchn 380 */ 381 if (irqptr->airq_share == 1) { 382 xen_psm_acquire_irq(irqno); 383 ec_unbind_irq(irqno); 384 } 385 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 386 /* 387 * If still in use reset priority 388 */ 389 if (!err && irqptr->airq_share != 0) { 390 err = ec_set_irq_priority(irqno, max_ipl); 391 return (err); 392 } 393 } else { 394 xen_psm_acquire_irq(irqno); 395 ec_unbind_irq(irqno); 396 } 397 return (err); 398 } 399 400 static processorid_t 401 xen_psm_get_next_processorid(processorid_t id) 402 { 403 if (id == -1) 404 return (0); 405 406 for (id++; id < NCPU; id++) { 407 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 408 case 0: /* yeah, that one's there */ 409 return (id); 410 default: 411 case X_EINVAL: /* out of range */ 412 return (-1); 413 case X_ENOENT: /* not present in the domain */ 414 /* 415 * It's not clear that we -need- to keep looking 416 * at this point, if, e.g., we can guarantee 417 * the hypervisor always keeps a contiguous range 418 * of vcpus around this is equivalent to "out of range". 419 * 420 * But it would be sad to miss a vcpu we're 421 * supposed to be using .. 422 */ 423 break; 424 } 425 } 426 427 return (-1); 428 } 429 430 /* 431 * XXPV - undo the start cpu op change; return to ignoring this value 432 * - also tweak error handling in main startup loop 433 */ 434 /*ARGSUSED*/ 435 static int 436 xen_psm_cpu_start(processorid_t id, caddr_t arg) 437 { 438 int ret; 439 440 ASSERT(id > 0); 441 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 442 ec_bind_cpu_ipis(id); 443 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 444 if ((ret = xen_vcpu_up(id)) == 0) 445 xen_psm_ncpus++; 446 else 447 ret = EINVAL; 448 return (ret); 449 } 450 451 /* 452 * Allocate an irq for inter cpu signaling 453 */ 454 /*ARGSUSED*/ 455 static int 456 xen_psm_get_ipivect(int ipl, int type) 457 { 458 return (ec_bind_ipi_to_irq(ipl, 0)); 459 } 460 461 /*ARGSUSED*/ 462 static int 463 xen_psm_get_clockirq(int ipl) 464 { 465 if (xen_clock_irq != INVALID_IRQ) 466 return (xen_clock_irq); 467 468 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 469 return (xen_clock_irq); 470 } 471 472 /*ARGSUSED*/ 473 static void 474 xen_psm_shutdown(int cmd, int fcn) 475 { 476 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 477 478 switch (cmd) { 479 case A_SHUTDOWN: 480 switch (fcn) { 481 case AD_BOOT: 482 case AD_IBOOT: 483 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 484 break; 485 case AD_POWEROFF: 486 /* fall through if domU or if poweroff fails */ 487 if (DOMAIN_IS_INITDOMAIN(xen_info)) 488 if (apic_enable_acpi) 489 (void) acpi_poweroff(); 490 /* FALLTHRU */ 491 case AD_HALT: 492 default: 493 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 494 break; 495 } 496 break; 497 case A_REBOOT: 498 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 499 break; 500 default: 501 return; 502 } 503 } 504 505 506 static int 507 xen_psm_translate_irq(dev_info_t *dip, int irqno) 508 { 509 if (dip == NULL) { 510 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 511 " dip = NULL\n", irqno)); 512 return (irqno); 513 } 514 return (irqno); 515 } 516 517 /* 518 * xen_psm_intr_enter() acks the event that triggered the interrupt and 519 * returns the new priority level, 520 */ 521 /*ARGSUSED*/ 522 static int 523 xen_psm_intr_enter(int ipl, int *vector) 524 { 525 int newipl; 526 uint_t intno; 527 cpu_t *cpu = CPU; 528 529 intno = (*vector); 530 531 ASSERT(intno < NR_IRQS); 532 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 533 534 ec_clear_irq(intno); 535 536 newipl = autovect[intno].avh_hi_pri; 537 if (newipl == 0) { 538 /* 539 * (newipl == 0) means we have no service routines for this 540 * vector. We will treat this as a spurious interrupt. 541 * We have cleared the pending bit already, clear the event 542 * mask and return a spurious interrupt. This case can happen 543 * when an interrupt delivery is racing with the removal of 544 * of the service routine for that interrupt. 545 */ 546 ec_unmask_irq(intno); 547 newipl = -1; /* flag spurious interrupt */ 548 } else if (newipl <= cpu->cpu_pri) { 549 /* 550 * (newipl <= cpu->cpu_pri) means that we must be trying to 551 * service a vector that was shared with a higher priority 552 * isr. The higher priority handler has been removed and 553 * we need to service this int. We can't return a lower 554 * priority than current cpu priority. Just synthesize a 555 * priority to return that should be acceptable. 556 */ 557 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 558 } 559 return (newipl); 560 } 561 562 563 /* 564 * xen_psm_intr_exit() restores the old interrupt 565 * priority level after processing an interrupt. 566 * It is called with interrupts disabled, and does not enable interrupts. 567 */ 568 /* ARGSUSED */ 569 static void 570 xen_psm_intr_exit(int ipl, int vector) 571 { 572 ec_try_unmask_irq(vector); 573 xen_psm_setspl(ipl); 574 } 575 576 intr_exit_fn_t 577 psm_intr_exit_fn(void) 578 { 579 return (xen_psm_intr_exit); 580 } 581 582 /* 583 * Check if new ipl level allows delivery of previously unserviced events 584 */ 585 static void 586 xen_psm_setspl(int ipl) 587 { 588 struct cpu *cpu = CPU; 589 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 590 uint16_t pending; 591 592 ASSERT(vci->evtchn_upcall_mask != 0); 593 594 /* 595 * If new ipl level will enable any pending interrupts, setup so the 596 * upcoming sti will cause us to get an upcall. 597 */ 598 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 599 if (pending) { 600 int i; 601 ulong_t pending_sels = 0; 602 volatile ulong_t *selp; 603 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 604 605 for (i = bsrw_insn(pending); i > ipl; i--) 606 pending_sels |= cpe->pending_sel[i]; 607 ASSERT(pending_sels); 608 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 609 atomic_or_ulong(selp, pending_sels); 610 vci->evtchn_upcall_pending = 1; 611 } 612 } 613 614 /* 615 * This function provides external interface to the nexus for all 616 * functionality related to the new DDI interrupt framework. 617 * 618 * Input: 619 * dip - pointer to the dev_info structure of the requested device 620 * hdlp - pointer to the internal interrupt handle structure for the 621 * requested interrupt 622 * intr_op - opcode for this call 623 * result - pointer to the integer that will hold the result to be 624 * passed back if return value is PSM_SUCCESS 625 * 626 * Output: 627 * return value is either PSM_SUCCESS or PSM_FAILURE 628 */ 629 int 630 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 631 psm_intr_op_t intr_op, int *result) 632 { 633 int cap; 634 int err; 635 int new_priority; 636 apic_irq_t *irqp; 637 struct intrspec *ispec; 638 639 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 640 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 641 642 switch (intr_op) { 643 case PSM_INTR_OP_CHECK_MSI: 644 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 645 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 646 DDI_INTR_TYPE_MSIX); 647 break; 648 } 649 /* 650 * Check MSI/X is supported or not at APIC level and 651 * masked off the MSI/X bits in hdlp->ih_type if not 652 * supported before return. If MSI/X is supported, 653 * leave the ih_type unchanged and return. 654 * 655 * hdlp->ih_type passed in from the nexus has all the 656 * interrupt types supported by the device. 657 */ 658 if (xen_support_msi == 0) { 659 /* 660 * if xen_support_msi is not set, call 661 * apic_check_msi_support() to check whether msi 662 * is supported first 663 */ 664 if (apic_check_msi_support() == PSM_SUCCESS) 665 xen_support_msi = 1; 666 else 667 xen_support_msi = -1; 668 } 669 if (xen_support_msi == 1) 670 *result = hdlp->ih_type; 671 else 672 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 673 DDI_INTR_TYPE_MSIX); 674 break; 675 case PSM_INTR_OP_ALLOC_VECTORS: 676 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 677 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 678 (int)(uintptr_t)hdlp->ih_scratch2); 679 break; 680 case PSM_INTR_OP_FREE_VECTORS: 681 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 682 hdlp->ih_pri, hdlp->ih_type); 683 break; 684 case PSM_INTR_OP_NAVAIL_VECTORS: 685 /* 686 * XXPV - maybe we should make this be: 687 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 688 */ 689 if (DOMAIN_IS_INITDOMAIN(xen_info)) 690 *result = APIC_VECTOR_PER_IPL; 691 else 692 *result = 1; 693 break; 694 case PSM_INTR_OP_XLATE_VECTOR: 695 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 696 if (ispec->intrspec_vec >= PIRQ_BASE && 697 ispec->intrspec_vec < NR_PIRQS && 698 DOMAIN_IS_INITDOMAIN(xen_info)) { 699 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 700 } else { 701 *result = ispec->intrspec_vec; 702 } 703 break; 704 case PSM_INTR_OP_GET_PENDING: 705 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 706 *result = ec_pending_irq(hdlp->ih_vector); 707 break; 708 case PSM_INTR_OP_CLEAR_MASK: 709 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 710 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 711 return (PSM_FAILURE); 712 ec_enable_irq(hdlp->ih_vector); 713 break; 714 case PSM_INTR_OP_SET_MASK: 715 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 716 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 717 return (PSM_FAILURE); 718 ec_disable_irq(hdlp->ih_vector); 719 break; 720 case PSM_INTR_OP_GET_CAP: 721 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 722 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 723 cap |= DDI_INTR_FLAG_MASKABLE; 724 *result = cap; 725 break; 726 case PSM_INTR_OP_GET_SHARED: 727 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 728 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 729 return (PSM_FAILURE); 730 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 731 == NULL) 732 return (PSM_FAILURE); 733 *result = irqp->airq_share ? 1: 0; 734 } else { 735 return (PSM_FAILURE); 736 } 737 break; 738 case PSM_INTR_OP_SET_PRI: 739 new_priority = *(int *)result; 740 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 741 if (err != 0) 742 return (PSM_FAILURE); 743 break; 744 case PSM_INTR_OP_GET_INTR: 745 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 746 return (PSM_FAILURE); 747 /* 748 * The interrupt handle given here has been allocated 749 * specifically for this command, and ih_private carries 750 * a pointer to a apic_get_intr_t. 751 */ 752 if (apic_get_vector_intr_info( 753 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 754 return (PSM_FAILURE); 755 break; 756 case PSM_INTR_OP_SET_CAP: 757 /* FALLTHRU */ 758 default: 759 return (PSM_FAILURE); 760 } 761 return (PSM_SUCCESS); 762 } 763 764 static void 765 xen_psm_rebind_irq(int irq) 766 { 767 cpuset_t ncpu; 768 processorid_t newcpu; 769 apic_irq_t *irqptr; 770 771 newcpu = xen_psm_bind_intr(irq); 772 if (newcpu == IRQ_UNBOUND) { 773 CPUSET_ZERO(ncpu); 774 CPUSET_OR(ncpu, xen_psm_cpus_online); 775 } else { 776 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 777 } 778 ec_set_irq_affinity(irq, ncpu); 779 if (irq <= APIC_MAX_VECTOR) { 780 irqptr = apic_irq_table[irq]; 781 ASSERT(irqptr != NULL); 782 irqptr->airq_temp_cpu = (uchar_t)newcpu; 783 } 784 } 785 786 /* 787 * Disable all device interrupts for the given cpu. 788 * High priority interrupts are not disabled and will still be serviced. 789 */ 790 static int 791 xen_psm_disable_intr(processorid_t cpun) 792 { 793 int irq; 794 795 /* 796 * Can't offline VCPU 0 on this hypervisor. There's no reason 797 * anyone would want to given that the CPUs are virtual. Also note 798 * that the hypervisor requires suspend/resume to be on VCPU 0. 799 */ 800 if (cpun == 0) 801 return (PSM_FAILURE); 802 803 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 804 for (irq = 0; irq < NR_IRQS; irq++) { 805 if (!ec_irq_needs_rebind(irq, cpun)) 806 continue; 807 xen_psm_rebind_irq(irq); 808 } 809 return (PSM_SUCCESS); 810 } 811 812 static void 813 xen_psm_enable_intr(processorid_t cpun) 814 { 815 int irq; 816 817 if (cpun == 0) 818 return; 819 820 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 821 822 /* 823 * Rebalance device interrupts among online processors 824 */ 825 for (irq = 0; irq < NR_IRQS; irq++) { 826 if (!ec_irq_rebindable(irq)) 827 continue; 828 xen_psm_rebind_irq(irq); 829 } 830 831 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 832 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 833 } 834 } 835 836 static int 837 xen_psm_post_cpu_start() 838 { 839 processorid_t cpun; 840 841 cpun = psm_get_cpu_id(); 842 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 843 /* 844 * Non-virtualized environments can call psm_post_cpu_start 845 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 846 * xen_psm_post_cpu_start() is only called from boot. 847 */ 848 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 849 } 850 return (PSM_SUCCESS); 851 } 852 853 /* 854 * This function will reprogram the timer. 855 * 856 * When in oneshot mode the argument is the absolute time in future at which to 857 * generate the interrupt. 858 * 859 * When in periodic mode, the argument is the interval at which the 860 * interrupts should be generated. There is no need to support the periodic 861 * mode timer change at this time. 862 * 863 * Note that we must be careful to convert from hrtime to Xen system time (see 864 * xpv_timestamp.c). 865 */ 866 static void 867 xen_psm_timer_reprogram(hrtime_t timer_req) 868 { 869 hrtime_t now, timer_new, time_delta, xen_time; 870 ulong_t flags; 871 872 flags = intr_clear(); 873 /* 874 * We should be called from high PIL context (CBE_HIGH_PIL), 875 * so kpreempt is disabled. 876 */ 877 878 now = xpv_gethrtime(); 879 xen_time = xpv_getsystime(); 880 if (timer_req <= now) { 881 /* 882 * requested to generate an interrupt in the past 883 * generate an interrupt as soon as possible 884 */ 885 time_delta = XEN_NSEC_PER_TICK; 886 } else 887 time_delta = timer_req - now; 888 889 timer_new = xen_time + time_delta; 890 if (HYPERVISOR_set_timer_op(timer_new) != 0) 891 panic("can't set hypervisor timer?"); 892 intr_restore(flags); 893 } 894 895 /* 896 * This function will enable timer interrupts. 897 */ 898 static void 899 xen_psm_timer_enable(void) 900 { 901 ec_unmask_irq(xen_clock_irq); 902 } 903 904 /* 905 * This function will disable timer interrupts on the current cpu. 906 */ 907 static void 908 xen_psm_timer_disable(void) 909 { 910 (void) ec_block_irq(xen_clock_irq); 911 /* 912 * If the clock irq is pending on this cpu then we need to 913 * clear the pending interrupt. 914 */ 915 ec_unpend_irq(xen_clock_irq); 916 } 917 918 /* 919 * 920 * The following functions are in the platform specific file so that they 921 * can be different functions depending on whether we are running on 922 * bare metal or a hypervisor. 923 */ 924 925 /* 926 * Allocate a free vector for irq at ipl. 927 */ 928 /* ARGSUSED */ 929 uchar_t 930 apic_allocate_vector(int ipl, int irq, int pri) 931 { 932 physdev_irq_t irq_op; 933 uchar_t vector; 934 935 irq_op.irq = irq; 936 937 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 938 panic("Hypervisor alloc vector failed"); 939 vector = irq_op.vector; 940 /* 941 * No need to worry about vector colliding with our reserved vectors 942 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 943 * generated traps and handle them properly. 944 */ 945 apic_vector_to_irq[vector] = (uchar_t)irq; 946 return (vector); 947 } 948 949 /* Mark vector as not being used by any irq */ 950 void 951 apic_free_vector(uchar_t vector) 952 { 953 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 954 } 955 956 /* 957 * This function allocate "count" vector(s) for the given "dip/pri/type" 958 */ 959 static int 960 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 961 int behavior) 962 { 963 int rcount, i; 964 uchar_t vector, cpu; 965 int irqno; 966 major_t major; 967 apic_irq_t *irqptr; 968 969 /* only supports MSI at the moment, will add MSI-X support later */ 970 if (type != DDI_INTR_TYPE_MSI) 971 return (0); 972 973 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 974 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 975 (void *)dip, type, inum, pri, count, behavior)); 976 977 if (count > 1) { 978 if (behavior == DDI_INTR_ALLOC_STRICT && 979 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 980 return (0); 981 982 if (apic_multi_msi_enable == 0) 983 count = 1; 984 else if (count > apic_multi_msi_max) 985 count = apic_multi_msi_max; 986 } 987 988 /* 989 * XXPV - metal version takes all vectors avail at given pri. 990 * Why do that? For now just allocate count vectors. 991 */ 992 rcount = count; 993 994 mutex_enter(&airq_mutex); 995 996 /* 997 * XXPV - currently the hypervisor does not support MSI at all. 998 * It doesn't return consecutive vectors. This code is a first 999 * cut for the (future) time that MSI is supported. 1000 */ 1001 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1002 for (i = 0; i < rcount; i++) { 1003 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 1004 INVALID_IRQ) { 1005 mutex_exit(&airq_mutex); 1006 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1007 "apic_allocate_irq failed\n")); 1008 return (i); 1009 } 1010 apic_max_device_irq = max(irqno, apic_max_device_irq); 1011 apic_min_device_irq = min(irqno, apic_min_device_irq); 1012 irqptr = apic_irq_table[irqno]; 1013 vector = apic_allocate_vector(pri, irqno, 0); 1014 apic_vector_to_irq[vector] = (uchar_t)irqno; 1015 #ifdef DEBUG 1016 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1017 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1018 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1019 #endif 1020 1021 irqptr->airq_vector = vector; 1022 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1023 irqptr->airq_intin_no = (uchar_t)rcount; 1024 irqptr->airq_ipl = pri; 1025 irqptr->airq_origirq = (uchar_t)(inum + i); 1026 irqptr->airq_share_id = 0; 1027 irqptr->airq_mps_intr_index = MSI_INDEX; 1028 irqptr->airq_dip = dip; 1029 irqptr->airq_major = major; 1030 if (i == 0) /* they all bound to the same cpu */ 1031 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1032 0xff, 0xff); 1033 else 1034 irqptr->airq_cpu = cpu; 1035 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1036 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1037 (void *)irqptr->airq_dip, irqptr->airq_vector, 1038 irqptr->airq_origirq, pri)); 1039 } 1040 mutex_exit(&airq_mutex); 1041 return (rcount); 1042 } 1043 1044 /* 1045 * The hypervisor doesn't permit access to local apics directly 1046 */ 1047 /* ARGSUSED */ 1048 uint32_t * 1049 mapin_apic(uint32_t addr, size_t len, int flags) 1050 { 1051 /* 1052 * Return a pointer to a memory area to fake out the 1053 * probe code that wants to read apic registers. 1054 * The dummy values will end up being ignored by xen 1055 * later on when they are used anyway. 1056 */ 1057 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1058 return (xen_psm_dummy_apic); 1059 } 1060 1061 /* ARGSUSED */ 1062 uint32_t * 1063 mapin_ioapic(uint32_t addr, size_t len, int flags) 1064 { 1065 /* 1066 * Return non-null here to fake out configure code that calls this. 1067 * The i86xpv platform will not reference through the returned value.. 1068 */ 1069 return ((uint32_t *)0x1); 1070 } 1071 1072 /* ARGSUSED */ 1073 void 1074 mapout_apic(caddr_t addr, size_t len) 1075 { 1076 } 1077 1078 /* ARGSUSED */ 1079 void 1080 mapout_ioapic(caddr_t addr, size_t len) 1081 { 1082 } 1083 1084 uint32_t 1085 ioapic_read(int apic_ix, uint32_t reg) 1086 { 1087 physdev_apic_t apic; 1088 1089 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1090 apic.reg = reg; 1091 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1092 panic("read ioapic %d reg %d failed", apic_ix, reg); 1093 return (apic.value); 1094 } 1095 1096 void 1097 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1098 { 1099 physdev_apic_t apic; 1100 1101 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1102 apic.reg = reg; 1103 apic.value = value; 1104 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1105 panic("write ioapic %d reg %d failed", apic_ix, reg); 1106 } 1107 1108 /* 1109 * This function was added as part of x2APIC support in pcplusmp. 1110 */ 1111 void 1112 ioapic_write_eoi(int apic_ix, uint32_t value) 1113 { 1114 physdev_apic_t apic; 1115 1116 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1117 apic.reg = APIC_IO_EOI; 1118 apic.value = value; 1119 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1120 panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix); 1121 } 1122 1123 /* 1124 * This function was added as part of x2APIC support in pcplusmp to resolve 1125 * undefined symbol in xpv_psm. 1126 */ 1127 void 1128 x2apic_update_psm() 1129 { 1130 } 1131 1132 /* 1133 * This function was added as part of x2APIC support in pcplusmp to resolve 1134 * undefined symbol in xpv_psm. 1135 */ 1136 void 1137 apic_ret() 1138 { 1139 } 1140 1141 /* 1142 * Call rebind to do the actual programming. 1143 */ 1144 int 1145 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1146 { 1147 apic_irq_t *irqptr; 1148 struct ioapic_reprogram_data *drep = NULL; 1149 int rv, cpu; 1150 cpuset_t cpus; 1151 1152 /* 1153 * Set cpu based on xen idea of online cpu's not apic tables. 1154 * Note that xen ignores/sets to it's own preferred value the 1155 * target cpu field when programming ioapic anyway. 1156 */ 1157 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1158 CPUSET_ZERO(cpus); 1159 CPUSET_OR(cpus, xen_psm_cpus_online); 1160 } else { 1161 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1162 } 1163 apic_irq_table[irq]->airq_cpu = cpu; 1164 if (deferred) { 1165 drep = (struct ioapic_reprogram_data *)p; 1166 ASSERT(drep != NULL); 1167 irqptr = drep->irqp; 1168 } else { 1169 irqptr = (apic_irq_t *)p; 1170 } 1171 ASSERT(irqptr != NULL); 1172 rv = apic_rebind(irqptr, cpu, drep); 1173 if (rv) { 1174 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1175 cpu = 0; 1176 rv = apic_rebind(irqptr, cpu, drep); 1177 } 1178 /* 1179 * If rebind successful bind the irq to an event channel 1180 */ 1181 if (rv == 0) { 1182 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1183 CPUSET_FIND(cpus, cpu); 1184 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1185 } 1186 return (rv); 1187 } 1188 1189 /* 1190 * Allocate a new vector for the given irq 1191 */ 1192 /* ARGSUSED */ 1193 uchar_t 1194 apic_modify_vector(uchar_t vector, int irq) 1195 { 1196 return (apic_allocate_vector(0, irq, 0)); 1197 } 1198 1199 /* 1200 * The rest of the file is just generic psm module boilerplate 1201 */ 1202 1203 static struct psm_ops xen_psm_ops = { 1204 xen_psm_probe, /* psm_probe */ 1205 1206 xen_psm_softinit, /* psm_init */ 1207 xen_psm_picinit, /* psm_picinit */ 1208 xen_psm_intr_enter, /* psm_intr_enter */ 1209 xen_psm_intr_exit, /* psm_intr_exit */ 1210 xen_psm_setspl, /* psm_setspl */ 1211 xen_psm_addspl, /* psm_addspl */ 1212 xen_psm_delspl, /* psm_delspl */ 1213 xen_psm_disable_intr, /* psm_disable_intr */ 1214 xen_psm_enable_intr, /* psm_enable_intr */ 1215 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1216 (void (*)(int))NULL, /* psm_set_softintr */ 1217 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1218 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1219 1220 xen_psm_clkinit, /* psm_clkinit */ 1221 xen_psm_get_clockirq, /* psm_get_clockirq */ 1222 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1223 xpv_gethrtime, /* psm_gethrtime */ 1224 1225 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1226 xen_psm_cpu_start, /* psm_cpu_start */ 1227 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1228 xen_psm_shutdown, /* psm_shutdown */ 1229 xen_psm_get_ipivect, /* psm_get_ipivect */ 1230 xen_psm_send_ipi, /* psm_send_ipi */ 1231 1232 xen_psm_translate_irq, /* psm_translate_irq */ 1233 1234 (void (*)(int, char *))NULL, /* psm_notify_error */ 1235 (void (*)(int msg))NULL, /* psm_notify_func */ 1236 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1237 xen_psm_timer_enable, /* psm_timer_enable */ 1238 xen_psm_timer_disable, /* psm_timer_disable */ 1239 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1240 (void (*)(int, int))NULL, /* psm_preshutdown */ 1241 xen_intr_ops /* Advanced DDI Interrupt framework */ 1242 }; 1243 1244 static struct psm_info xen_psm_info = { 1245 PSM_INFO_VER01_5, /* version */ 1246 PSM_OWN_EXCLUSIVE, /* ownership */ 1247 &xen_psm_ops, /* operation */ 1248 "xVM_psm", /* machine name */ 1249 "platform module %I%" /* machine descriptions */ 1250 }; 1251 1252 static void *xen_psm_hdlp; 1253 1254 int 1255 _init(void) 1256 { 1257 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1258 } 1259 1260 int 1261 _fini(void) 1262 { 1263 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1264 } 1265 1266 int 1267 _info(struct modinfo *modinfop) 1268 { 1269 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1270 } 1271