1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define PSMI_1_5 30 31 #include <sys/mutex.h> 32 #include <sys/types.h> 33 #include <sys/time.h> 34 #include <sys/clock.h> 35 #include <sys/machlock.h> 36 #include <sys/smp_impldefs.h> 37 #include <sys/uadmin.h> 38 #include <sys/promif.h> 39 #include <sys/psm.h> 40 #include <sys/psm_common.h> 41 #include <sys/atomic.h> 42 #include <sys/apic.h> 43 #include <sys/archsystm.h> 44 #include <sys/mach_intr.h> 45 #include <sys/hypervisor.h> 46 #include <sys/evtchn_impl.h> 47 #include <sys/modctl.h> 48 #include <sys/trap.h> 49 #include <sys/panic.h> 50 51 #include <xen/public/vcpu.h> 52 #include <xen/public/physdev.h> 53 54 55 /* 56 * Global Data 57 */ 58 59 int xen_psm_verbose = 0; 60 61 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 62 int apic_error = 0; 63 int apic_verbose = 0; 64 cpuset_t apic_cpumask; 65 int apic_forceload = 0; 66 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 67 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 68 }; 69 uchar_t apic_ipltopri[MAXIPL + 1]; 70 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 71 uint_t apic_picinit_called; 72 apic_cpus_info_t *apic_cpus; 73 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 74 /* use to make sure only one cpu handles the nmi */ 75 static lock_t xen_psm_nmi_lock; 76 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 77 int xen_psm_panic_on_nmi = 0; 78 int xen_psm_num_nmis = 0; 79 80 cpuset_t xen_psm_cpus_online; /* online cpus */ 81 int xen_psm_ncpus = 1; /* cpu count */ 82 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 83 84 /* 85 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 86 * support MSI at all. Change this initialization to zero when MSI is 87 * supported. 88 */ 89 int xen_support_msi = -1; 90 91 static int xen_clock_irq = INVALID_IRQ; 92 93 /* flag definitions for xen_psm_verbose */ 94 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 95 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 96 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 97 98 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 99 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 100 cmn_err fmt; 101 102 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 103 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 104 prom_printf fmt; 105 106 /* 107 * Dummy apic array to point common routines at that want to do some apic 108 * manipulation. Xen doesn't allow guest apic access so we point at these 109 * memory locations to fake out those who want to do apic fiddling. 110 */ 111 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 112 113 static struct psm_info xen_psm_info; 114 static void xen_psm_setspl(int); 115 116 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 117 118 /* 119 * Local support routines 120 */ 121 122 /* 123 * Select vcpu to bind xen virtual device interrupt to. 124 */ 125 /*ARGSUSED*/ 126 int 127 xen_psm_bind_intr(int irq) 128 { 129 int bind_cpu, test_cpu; 130 apic_irq_t *irqptr; 131 132 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 133 return (IRQ_UNBOUND); 134 if (irq <= APIC_MAX_VECTOR) 135 irqptr = apic_irq_table[irq]; 136 else 137 irqptr = NULL; 138 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 139 bind_cpu = irqptr->airq_cpu; 140 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 141 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 142 bind_cpu = 0; 143 goto done; 144 } 145 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 146 do { 147 bind_cpu = xen_psm_next_bind_cpu++; 148 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 149 xen_psm_next_bind_cpu = 0; 150 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 151 } else { 152 bind_cpu = 0; 153 } 154 done: 155 return (bind_cpu); 156 } 157 158 /* 159 * Autoconfiguration Routines 160 */ 161 162 static int 163 xen_psm_probe(void) 164 { 165 int ret = PSM_SUCCESS; 166 167 if (DOMAIN_IS_INITDOMAIN(xen_info)) 168 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 169 return (ret); 170 } 171 172 static void 173 xen_psm_softinit(void) 174 { 175 /* LINTED logical expression always true: op "||" */ 176 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 177 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 178 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 179 apic_init_common(); 180 } 181 } 182 183 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 184 185 /*ARGSUSED*/ 186 static int 187 xen_psm_clkinit(int hertz) 188 { 189 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 190 extern int dosynctodr; 191 192 /* 193 * domU cannot set the TOD hardware, fault the TOD clock now to 194 * indicate that and turn off attempts to sync TOD hardware 195 * with the hires timer. 196 */ 197 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 198 mutex_enter(&tod_lock); 199 (void) tod_fault(TOD_RDONLY, 0); 200 dosynctodr = 0; 201 mutex_exit(&tod_lock); 202 } 203 /* 204 * The hypervisor provides a timer based on the local APIC timer. 205 * The interface supports requests of nanosecond resolution. 206 * A common frequency of the apic clock is 100 Mhz which 207 * gives a resolution of 10 nsec per tick. What we would really like 208 * is a way to get the ns per tick value from xen. 209 * XXPV - This is an assumption that needs checking and may change 210 */ 211 return (XEN_NSEC_PER_TICK); 212 } 213 214 static void 215 xen_psm_hrtimeinit(void) 216 { 217 extern int gethrtime_hires; 218 gethrtime_hires = 1; 219 } 220 221 /* xen_psm NMI handler */ 222 /*ARGSUSED*/ 223 static void 224 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 225 { 226 xen_psm_num_nmis++; 227 228 if (!lock_try(&xen_psm_nmi_lock)) 229 return; 230 231 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 232 debug_enter("NMI received: entering kmdb\n"); 233 } else if (xen_psm_panic_on_nmi) { 234 /* Keep panic from entering kmdb. */ 235 nopanicdebug = 1; 236 panic("NMI received\n"); 237 } else { 238 /* 239 * prom_printf is the best shot we have of something which is 240 * problem free from high level/NMI type of interrupts 241 */ 242 prom_printf("NMI received\n"); 243 } 244 245 lock_clear(&xen_psm_nmi_lock); 246 } 247 248 static void 249 xen_psm_picinit() 250 { 251 int cpu, irqno; 252 cpuset_t cpus; 253 254 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 255 /* set a flag so we know we have run xen_psm_picinit() */ 256 apic_picinit_called = 1; 257 LOCK_INIT_CLEAR(&apic_ioapic_lock); 258 259 /* XXPV - do we need to do this? */ 260 picsetup(); /* initialise the 8259 */ 261 262 /* enable apic mode if imcr present */ 263 /* XXPV - do we need to do this either? */ 264 if (apic_imcrp) { 265 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 266 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 267 } 268 269 ioapic_init_intr(IOAPIC_NOMASK); 270 /* 271 * We never called xen_psm_addspl() when the SCI 272 * interrupt was added because that happened before the 273 * PSM module was loaded. Fix that up here by doing 274 * any missed operations (e.g. bind to CPU) 275 */ 276 if ((irqno = apic_sci_vect) > 0) { 277 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 278 CPUSET_ZERO(cpus); 279 CPUSET_OR(cpus, xen_psm_cpus_online); 280 } else { 281 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 282 } 283 ec_set_irq_affinity(irqno, cpus); 284 apic_irq_table[irqno]->airq_temp_cpu = 285 (uchar_t)(cpu & ~IRQ_USER_BOUND); 286 ec_enable_irq(irqno); 287 } 288 } 289 290 /* add nmi handler - least priority nmi handler */ 291 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 292 293 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 294 "xVM_psm NMI handler", (caddr_t)NULL)) 295 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 296 } 297 298 299 /* 300 * generates an interprocessor interrupt to another CPU 301 */ 302 static void 303 xen_psm_send_ipi(int cpun, int ipl) 304 { 305 ulong_t flag = intr_clear(); 306 307 ec_send_ipi(ipl, cpun); 308 intr_restore(flag); 309 } 310 311 /*ARGSUSED*/ 312 static int 313 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 314 { 315 int cpu, ret; 316 cpuset_t cpus; 317 318 /* 319 * We are called at splhi() so we can't call anything that might end 320 * up trying to context switch. 321 */ 322 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 323 DOMAIN_IS_INITDOMAIN(xen_info)) { 324 /* 325 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 326 */ 327 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 328 } else { 329 /* 330 * Set priority/affinity/enable for non PIRQs 331 */ 332 ret = ec_set_irq_priority(irqno, ipl); 333 ASSERT(ret == 0); 334 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 335 CPUSET_ZERO(cpus); 336 CPUSET_OR(cpus, xen_psm_cpus_online); 337 } else { 338 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 339 } 340 ec_set_irq_affinity(irqno, cpus); 341 ec_enable_irq(irqno); 342 } 343 return (ret); 344 } 345 346 /* 347 * Acquire ownership of this irq on this cpu 348 */ 349 void 350 xen_psm_acquire_irq(int irq) 351 { 352 ulong_t flags; 353 int cpuid; 354 355 /* 356 * If the irq is currently being serviced by another cpu 357 * we busy-wait for the other cpu to finish. Take any 358 * pending interrupts before retrying. 359 */ 360 do { 361 flags = intr_clear(); 362 cpuid = ec_block_irq(irq); 363 intr_restore(flags); 364 } while (cpuid != CPU->cpu_id); 365 } 366 367 /*ARGSUSED*/ 368 static int 369 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 370 { 371 apic_irq_t *irqptr; 372 int err = PSM_SUCCESS; 373 374 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 375 DOMAIN_IS_INITDOMAIN(xen_info)) { 376 irqptr = apic_irq_table[irqno]; 377 /* 378 * unbind if no more sharers of this irq/evtchn 379 */ 380 if (irqptr->airq_share == 1) { 381 xen_psm_acquire_irq(irqno); 382 ec_unbind_irq(irqno); 383 } 384 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 385 /* 386 * If still in use reset priority 387 */ 388 if (!err && irqptr->airq_share != 0) { 389 err = ec_set_irq_priority(irqno, max_ipl); 390 return (err); 391 } 392 } else { 393 xen_psm_acquire_irq(irqno); 394 ec_unbind_irq(irqno); 395 } 396 return (err); 397 } 398 399 static processorid_t 400 xen_psm_get_next_processorid(processorid_t id) 401 { 402 if (id == -1) 403 return (0); 404 405 for (id++; id < NCPU; id++) { 406 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 407 case 0: /* yeah, that one's there */ 408 return (id); 409 default: 410 case X_EINVAL: /* out of range */ 411 return (-1); 412 case X_ENOENT: /* not present in the domain */ 413 /* 414 * It's not clear that we -need- to keep looking 415 * at this point, if, e.g., we can guarantee 416 * the hypervisor always keeps a contiguous range 417 * of vcpus around this is equivalent to "out of range". 418 * 419 * But it would be sad to miss a vcpu we're 420 * supposed to be using .. 421 */ 422 break; 423 } 424 } 425 426 return (-1); 427 } 428 429 /* 430 * XXPV - undo the start cpu op change; return to ignoring this value 431 * - also tweak error handling in main startup loop 432 */ 433 /*ARGSUSED*/ 434 static int 435 xen_psm_cpu_start(processorid_t id, caddr_t arg) 436 { 437 int ret; 438 439 ASSERT(id > 0); 440 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 441 ec_bind_cpu_ipis(id); 442 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 443 if ((ret = xen_vcpu_up(id)) == 0) 444 xen_psm_ncpus++; 445 else 446 ret = EINVAL; 447 return (ret); 448 } 449 450 /* 451 * Allocate an irq for inter cpu signaling 452 */ 453 /*ARGSUSED*/ 454 static int 455 xen_psm_get_ipivect(int ipl, int type) 456 { 457 return (ec_bind_ipi_to_irq(ipl, 0)); 458 } 459 460 /*ARGSUSED*/ 461 static int 462 xen_psm_get_clockirq(int ipl) 463 { 464 if (xen_clock_irq != INVALID_IRQ) 465 return (xen_clock_irq); 466 467 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 468 return (xen_clock_irq); 469 } 470 471 /*ARGSUSED*/ 472 static void 473 xen_psm_shutdown(int cmd, int fcn) 474 { 475 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 476 477 switch (cmd) { 478 case A_SHUTDOWN: 479 switch (fcn) { 480 case AD_BOOT: 481 case AD_IBOOT: 482 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 483 break; 484 case AD_POWEROFF: 485 /* fall through if domU or if poweroff fails */ 486 if (DOMAIN_IS_INITDOMAIN(xen_info)) 487 if (apic_enable_acpi) 488 (void) acpi_poweroff(); 489 /* FALLTHRU */ 490 case AD_HALT: 491 default: 492 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 493 break; 494 } 495 break; 496 case A_REBOOT: 497 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 498 break; 499 default: 500 return; 501 } 502 } 503 504 505 static int 506 xen_psm_translate_irq(dev_info_t *dip, int irqno) 507 { 508 if (dip == NULL) { 509 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 510 " dip = NULL\n", irqno)); 511 return (irqno); 512 } 513 return (irqno); 514 } 515 516 /* 517 * xen_psm_intr_enter() acks the event that triggered the interrupt and 518 * returns the new priority level, 519 */ 520 /*ARGSUSED*/ 521 static int 522 xen_psm_intr_enter(int ipl, int *vector) 523 { 524 int newipl; 525 uint_t intno; 526 cpu_t *cpu = CPU; 527 528 intno = (*vector); 529 530 ASSERT(intno < NR_IRQS); 531 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 532 533 ec_clear_irq(intno); 534 535 newipl = autovect[intno].avh_hi_pri; 536 if (newipl == 0) { 537 /* 538 * (newipl == 0) means we have no service routines for this 539 * vector. We will treat this as a spurious interrupt. 540 * We have cleared the pending bit already, clear the event 541 * mask and return a spurious interrupt. This case can happen 542 * when an interrupt delivery is racing with the removal of 543 * of the service routine for that interrupt. 544 */ 545 ec_unmask_irq(intno); 546 newipl = -1; /* flag spurious interrupt */ 547 } else if (newipl <= cpu->cpu_pri) { 548 /* 549 * (newipl <= cpu->cpu_pri) means that we must be trying to 550 * service a vector that was shared with a higher priority 551 * isr. The higher priority handler has been removed and 552 * we need to service this int. We can't return a lower 553 * priority than current cpu priority. Just synthesize a 554 * priority to return that should be acceptable. 555 */ 556 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 557 } 558 return (newipl); 559 } 560 561 562 /* 563 * xen_psm_intr_exit() restores the old interrupt 564 * priority level after processing an interrupt. 565 * It is called with interrupts disabled, and does not enable interrupts. 566 */ 567 /* ARGSUSED */ 568 static void 569 xen_psm_intr_exit(int ipl, int vector) 570 { 571 ec_try_unmask_irq(vector); 572 xen_psm_setspl(ipl); 573 } 574 575 intr_exit_fn_t 576 psm_intr_exit_fn(void) 577 { 578 return (xen_psm_intr_exit); 579 } 580 581 /* 582 * Check if new ipl level allows delivery of previously unserviced events 583 */ 584 static void 585 xen_psm_setspl(int ipl) 586 { 587 struct cpu *cpu = CPU; 588 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 589 uint16_t pending; 590 591 ASSERT(vci->evtchn_upcall_mask != 0); 592 593 /* 594 * If new ipl level will enable any pending interrupts, setup so the 595 * upcoming sti will cause us to get an upcall. 596 */ 597 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 598 if (pending) { 599 int i; 600 ulong_t pending_sels = 0; 601 volatile ulong_t *selp; 602 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 603 604 for (i = bsrw_insn(pending); i > ipl; i--) 605 pending_sels |= cpe->pending_sel[i]; 606 ASSERT(pending_sels); 607 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 608 atomic_or_ulong(selp, pending_sels); 609 vci->evtchn_upcall_pending = 1; 610 } 611 } 612 613 /* 614 * This function provides external interface to the nexus for all 615 * functionality related to the new DDI interrupt framework. 616 * 617 * Input: 618 * dip - pointer to the dev_info structure of the requested device 619 * hdlp - pointer to the internal interrupt handle structure for the 620 * requested interrupt 621 * intr_op - opcode for this call 622 * result - pointer to the integer that will hold the result to be 623 * passed back if return value is PSM_SUCCESS 624 * 625 * Output: 626 * return value is either PSM_SUCCESS or PSM_FAILURE 627 */ 628 int 629 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 630 psm_intr_op_t intr_op, int *result) 631 { 632 int cap; 633 int err; 634 int new_priority; 635 apic_irq_t *irqp; 636 struct intrspec *ispec; 637 638 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 639 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 640 641 switch (intr_op) { 642 case PSM_INTR_OP_CHECK_MSI: 643 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 644 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 645 DDI_INTR_TYPE_MSIX); 646 break; 647 } 648 /* 649 * Check MSI/X is supported or not at APIC level and 650 * masked off the MSI/X bits in hdlp->ih_type if not 651 * supported before return. If MSI/X is supported, 652 * leave the ih_type unchanged and return. 653 * 654 * hdlp->ih_type passed in from the nexus has all the 655 * interrupt types supported by the device. 656 */ 657 if (xen_support_msi == 0) { 658 /* 659 * if xen_support_msi is not set, call 660 * apic_check_msi_support() to check whether msi 661 * is supported first 662 */ 663 if (apic_check_msi_support() == PSM_SUCCESS) 664 xen_support_msi = 1; 665 else 666 xen_support_msi = -1; 667 } 668 if (xen_support_msi == 1) 669 *result = hdlp->ih_type; 670 else 671 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 672 DDI_INTR_TYPE_MSIX); 673 break; 674 case PSM_INTR_OP_ALLOC_VECTORS: 675 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 676 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 677 (int)(uintptr_t)hdlp->ih_scratch2); 678 break; 679 case PSM_INTR_OP_FREE_VECTORS: 680 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 681 hdlp->ih_pri, hdlp->ih_type); 682 break; 683 case PSM_INTR_OP_NAVAIL_VECTORS: 684 /* 685 * XXPV - maybe we should make this be: 686 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 687 */ 688 if (DOMAIN_IS_INITDOMAIN(xen_info)) 689 *result = APIC_VECTOR_PER_IPL; 690 else 691 *result = 1; 692 break; 693 case PSM_INTR_OP_XLATE_VECTOR: 694 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 695 if (ispec->intrspec_vec >= PIRQ_BASE && 696 ispec->intrspec_vec < NR_PIRQS && 697 DOMAIN_IS_INITDOMAIN(xen_info)) { 698 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 699 } else { 700 *result = ispec->intrspec_vec; 701 } 702 break; 703 case PSM_INTR_OP_GET_PENDING: 704 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 705 *result = ec_pending_irq(hdlp->ih_vector); 706 break; 707 case PSM_INTR_OP_CLEAR_MASK: 708 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 709 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 710 return (PSM_FAILURE); 711 ec_enable_irq(hdlp->ih_vector); 712 break; 713 case PSM_INTR_OP_SET_MASK: 714 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 715 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 716 return (PSM_FAILURE); 717 ec_disable_irq(hdlp->ih_vector); 718 break; 719 case PSM_INTR_OP_GET_CAP: 720 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 721 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 722 cap |= DDI_INTR_FLAG_MASKABLE; 723 *result = cap; 724 break; 725 case PSM_INTR_OP_GET_SHARED: 726 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 727 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 728 return (PSM_FAILURE); 729 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 730 == NULL) 731 return (PSM_FAILURE); 732 *result = irqp->airq_share ? 1: 0; 733 } else { 734 return (PSM_FAILURE); 735 } 736 break; 737 case PSM_INTR_OP_SET_PRI: 738 new_priority = *(int *)result; 739 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 740 if (err != 0) 741 return (PSM_FAILURE); 742 break; 743 case PSM_INTR_OP_GET_INTR: 744 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 745 return (PSM_FAILURE); 746 /* 747 * The interrupt handle given here has been allocated 748 * specifically for this command, and ih_private carries 749 * a pointer to a apic_get_intr_t. 750 */ 751 if (apic_get_vector_intr_info( 752 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 753 return (PSM_FAILURE); 754 break; 755 case PSM_INTR_OP_SET_CAP: 756 /* FALLTHRU */ 757 default: 758 return (PSM_FAILURE); 759 } 760 return (PSM_SUCCESS); 761 } 762 763 static void 764 xen_psm_rebind_irq(int irq) 765 { 766 cpuset_t ncpu; 767 processorid_t newcpu; 768 apic_irq_t *irqptr; 769 770 newcpu = xen_psm_bind_intr(irq); 771 if (newcpu == IRQ_UNBOUND) { 772 CPUSET_ZERO(ncpu); 773 CPUSET_OR(ncpu, xen_psm_cpus_online); 774 } else { 775 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 776 } 777 ec_set_irq_affinity(irq, ncpu); 778 if (irq <= APIC_MAX_VECTOR) { 779 irqptr = apic_irq_table[irq]; 780 ASSERT(irqptr != NULL); 781 irqptr->airq_temp_cpu = (uchar_t)newcpu; 782 } 783 } 784 785 /* 786 * Disable all device interrupts for the given cpu. 787 * High priority interrupts are not disabled and will still be serviced. 788 */ 789 static int 790 xen_psm_disable_intr(processorid_t cpun) 791 { 792 int irq; 793 794 /* 795 * Can't offline VCPU 0 on this hypervisor. There's no reason 796 * anyone would want to given that the CPUs are virtual. Also note 797 * that the hypervisor requires suspend/resume to be on VCPU 0. 798 */ 799 if (cpun == 0) 800 return (PSM_FAILURE); 801 802 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 803 for (irq = 0; irq < NR_IRQS; irq++) { 804 if (!ec_irq_needs_rebind(irq, cpun)) 805 continue; 806 xen_psm_rebind_irq(irq); 807 } 808 return (PSM_SUCCESS); 809 } 810 811 static void 812 xen_psm_enable_intr(processorid_t cpun) 813 { 814 int irq; 815 816 if (cpun == 0) 817 return; 818 819 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 820 821 /* 822 * Rebalance device interrupts among online processors 823 */ 824 for (irq = 0; irq < NR_IRQS; irq++) { 825 if (!ec_irq_rebindable(irq)) 826 continue; 827 xen_psm_rebind_irq(irq); 828 } 829 830 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 831 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 832 } 833 } 834 835 static int 836 xen_psm_post_cpu_start() 837 { 838 processorid_t cpun; 839 840 cpun = psm_get_cpu_id(); 841 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 842 /* 843 * Non-virtualized environments can call psm_post_cpu_start 844 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. 845 * xen_psm_post_cpu_start() is only called from boot. 846 */ 847 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; 848 } 849 return (PSM_SUCCESS); 850 } 851 852 /* 853 * This function will reprogram the timer. 854 * 855 * When in oneshot mode the argument is the absolute time in future at which to 856 * generate the interrupt. 857 * 858 * When in periodic mode, the argument is the interval at which the 859 * interrupts should be generated. There is no need to support the periodic 860 * mode timer change at this time. 861 * 862 * Note that we must be careful to convert from hrtime to Xen system time (see 863 * xpv_timestamp.c). 864 */ 865 static void 866 xen_psm_timer_reprogram(hrtime_t timer_req) 867 { 868 hrtime_t now, timer_new, time_delta, xen_time; 869 ulong_t flags; 870 871 flags = intr_clear(); 872 /* 873 * We should be called from high PIL context (CBE_HIGH_PIL), 874 * so kpreempt is disabled. 875 */ 876 877 now = xpv_gethrtime(); 878 xen_time = xpv_getsystime(); 879 if (timer_req <= now) { 880 /* 881 * requested to generate an interrupt in the past 882 * generate an interrupt as soon as possible 883 */ 884 time_delta = XEN_NSEC_PER_TICK; 885 } else 886 time_delta = timer_req - now; 887 888 timer_new = xen_time + time_delta; 889 if (HYPERVISOR_set_timer_op(timer_new) != 0) 890 panic("can't set hypervisor timer?"); 891 intr_restore(flags); 892 } 893 894 /* 895 * This function will enable timer interrupts. 896 */ 897 static void 898 xen_psm_timer_enable(void) 899 { 900 ec_unmask_irq(xen_clock_irq); 901 } 902 903 /* 904 * This function will disable timer interrupts on the current cpu. 905 */ 906 static void 907 xen_psm_timer_disable(void) 908 { 909 (void) ec_block_irq(xen_clock_irq); 910 /* 911 * If the clock irq is pending on this cpu then we need to 912 * clear the pending interrupt. 913 */ 914 ec_unpend_irq(xen_clock_irq); 915 } 916 917 /* 918 * 919 * The following functions are in the platform specific file so that they 920 * can be different functions depending on whether we are running on 921 * bare metal or a hypervisor. 922 */ 923 924 /* 925 * Allocate a free vector for irq at ipl. 926 */ 927 /* ARGSUSED */ 928 uchar_t 929 apic_allocate_vector(int ipl, int irq, int pri) 930 { 931 physdev_irq_t irq_op; 932 uchar_t vector; 933 934 irq_op.irq = irq; 935 936 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 937 panic("Hypervisor alloc vector failed"); 938 vector = irq_op.vector; 939 /* 940 * No need to worry about vector colliding with our reserved vectors 941 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 942 * generated traps and handle them properly. 943 */ 944 apic_vector_to_irq[vector] = (uchar_t)irq; 945 return (vector); 946 } 947 948 /* Mark vector as not being used by any irq */ 949 void 950 apic_free_vector(uchar_t vector) 951 { 952 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 953 } 954 955 /* 956 * This function allocate "count" vector(s) for the given "dip/pri/type" 957 */ 958 static int 959 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 960 int behavior) 961 { 962 int rcount, i; 963 uchar_t vector, cpu; 964 int irqno; 965 major_t major; 966 apic_irq_t *irqptr; 967 968 /* only supports MSI at the moment, will add MSI-X support later */ 969 if (type != DDI_INTR_TYPE_MSI) 970 return (0); 971 972 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 973 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 974 (void *)dip, type, inum, pri, count, behavior)); 975 976 if (count > 1) { 977 if (behavior == DDI_INTR_ALLOC_STRICT && 978 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 979 return (0); 980 981 if (apic_multi_msi_enable == 0) 982 count = 1; 983 else if (count > apic_multi_msi_max) 984 count = apic_multi_msi_max; 985 } 986 987 /* 988 * XXPV - metal version takes all vectors avail at given pri. 989 * Why do that? For now just allocate count vectors. 990 */ 991 rcount = count; 992 993 mutex_enter(&airq_mutex); 994 995 /* 996 * XXPV - currently the hypervisor does not support MSI at all. 997 * It doesn't return consecutive vectors. This code is a first 998 * cut for the (future) time that MSI is supported. 999 */ 1000 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 1001 for (i = 0; i < rcount; i++) { 1002 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 1003 INVALID_IRQ) { 1004 mutex_exit(&airq_mutex); 1005 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1006 "apic_allocate_irq failed\n")); 1007 return (i); 1008 } 1009 apic_max_device_irq = max(irqno, apic_max_device_irq); 1010 apic_min_device_irq = min(irqno, apic_min_device_irq); 1011 irqptr = apic_irq_table[irqno]; 1012 vector = apic_allocate_vector(pri, irqno, 0); 1013 apic_vector_to_irq[vector] = (uchar_t)irqno; 1014 #ifdef DEBUG 1015 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1016 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1017 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1018 #endif 1019 1020 irqptr->airq_vector = vector; 1021 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1022 irqptr->airq_intin_no = (uchar_t)rcount; 1023 irqptr->airq_ipl = pri; 1024 irqptr->airq_origirq = (uchar_t)(inum + i); 1025 irqptr->airq_share_id = 0; 1026 irqptr->airq_mps_intr_index = MSI_INDEX; 1027 irqptr->airq_dip = dip; 1028 irqptr->airq_major = major; 1029 if (i == 0) /* they all bound to the same cpu */ 1030 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1031 0xff, 0xff); 1032 else 1033 irqptr->airq_cpu = cpu; 1034 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1035 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1036 (void *)irqptr->airq_dip, irqptr->airq_vector, 1037 irqptr->airq_origirq, pri)); 1038 } 1039 mutex_exit(&airq_mutex); 1040 return (rcount); 1041 } 1042 1043 /* 1044 * The hypervisor doesn't permit access to local apics directly 1045 */ 1046 /* ARGSUSED */ 1047 uint32_t * 1048 mapin_apic(uint32_t addr, size_t len, int flags) 1049 { 1050 /* 1051 * Return a pointer to a memory area to fake out the 1052 * probe code that wants to read apic registers. 1053 * The dummy values will end up being ignored by xen 1054 * later on when they are used anyway. 1055 */ 1056 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1057 return (xen_psm_dummy_apic); 1058 } 1059 1060 /* ARGSUSED */ 1061 uint32_t * 1062 mapin_ioapic(uint32_t addr, size_t len, int flags) 1063 { 1064 /* 1065 * Return non-null here to fake out configure code that calls this. 1066 * The i86xpv platform will not reference through the returned value.. 1067 */ 1068 return ((uint32_t *)0x1); 1069 } 1070 1071 /* ARGSUSED */ 1072 void 1073 mapout_apic(caddr_t addr, size_t len) 1074 { 1075 } 1076 1077 /* ARGSUSED */ 1078 void 1079 mapout_ioapic(caddr_t addr, size_t len) 1080 { 1081 } 1082 1083 uint32_t 1084 ioapic_read(int apic_ix, uint32_t reg) 1085 { 1086 physdev_apic_t apic; 1087 1088 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1089 apic.reg = reg; 1090 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1091 panic("read ioapic %d reg %d failed", apic_ix, reg); 1092 return (apic.value); 1093 } 1094 1095 void 1096 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1097 { 1098 physdev_apic_t apic; 1099 1100 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1101 apic.reg = reg; 1102 apic.value = value; 1103 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1104 panic("write ioapic %d reg %d failed", apic_ix, reg); 1105 } 1106 1107 /* 1108 * Call rebind to do the actual programming. 1109 */ 1110 int 1111 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1112 { 1113 apic_irq_t *irqptr; 1114 struct ioapic_reprogram_data *drep = NULL; 1115 int rv, cpu; 1116 cpuset_t cpus; 1117 1118 /* 1119 * Set cpu based on xen idea of online cpu's not apic tables. 1120 * Note that xen ignores/sets to it's own preferred value the 1121 * target cpu field when programming ioapic anyway. 1122 */ 1123 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1124 CPUSET_ZERO(cpus); 1125 CPUSET_OR(cpus, xen_psm_cpus_online); 1126 } else { 1127 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1128 } 1129 apic_irq_table[irq]->airq_cpu = cpu; 1130 if (deferred) { 1131 drep = (struct ioapic_reprogram_data *)p; 1132 ASSERT(drep != NULL); 1133 irqptr = drep->irqp; 1134 } else { 1135 irqptr = (apic_irq_t *)p; 1136 } 1137 ASSERT(irqptr != NULL); 1138 rv = apic_rebind(irqptr, cpu, drep); 1139 if (rv) { 1140 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1141 cpu = 0; 1142 rv = apic_rebind(irqptr, cpu, drep); 1143 } 1144 /* 1145 * If rebind successful bind the irq to an event channel 1146 */ 1147 if (rv == 0) { 1148 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1149 CPUSET_FIND(cpus, cpu); 1150 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1151 } 1152 return (rv); 1153 } 1154 1155 /* 1156 * Allocate a new vector for the given irq 1157 */ 1158 /* ARGSUSED */ 1159 uchar_t 1160 apic_modify_vector(uchar_t vector, int irq) 1161 { 1162 return (apic_allocate_vector(0, irq, 0)); 1163 } 1164 1165 /* 1166 * The rest of the file is just generic psm module boilerplate 1167 */ 1168 1169 static struct psm_ops xen_psm_ops = { 1170 xen_psm_probe, /* psm_probe */ 1171 1172 xen_psm_softinit, /* psm_init */ 1173 xen_psm_picinit, /* psm_picinit */ 1174 xen_psm_intr_enter, /* psm_intr_enter */ 1175 xen_psm_intr_exit, /* psm_intr_exit */ 1176 xen_psm_setspl, /* psm_setspl */ 1177 xen_psm_addspl, /* psm_addspl */ 1178 xen_psm_delspl, /* psm_delspl */ 1179 xen_psm_disable_intr, /* psm_disable_intr */ 1180 xen_psm_enable_intr, /* psm_enable_intr */ 1181 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1182 (void (*)(int))NULL, /* psm_set_softintr */ 1183 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1184 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1185 1186 xen_psm_clkinit, /* psm_clkinit */ 1187 xen_psm_get_clockirq, /* psm_get_clockirq */ 1188 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1189 xpv_gethrtime, /* psm_gethrtime */ 1190 1191 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1192 xen_psm_cpu_start, /* psm_cpu_start */ 1193 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1194 xen_psm_shutdown, /* psm_shutdown */ 1195 xen_psm_get_ipivect, /* psm_get_ipivect */ 1196 xen_psm_send_ipi, /* psm_send_ipi */ 1197 1198 xen_psm_translate_irq, /* psm_translate_irq */ 1199 1200 (void (*)(int, char *))NULL, /* psm_notify_error */ 1201 (void (*)(int msg))NULL, /* psm_notify_func */ 1202 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1203 xen_psm_timer_enable, /* psm_timer_enable */ 1204 xen_psm_timer_disable, /* psm_timer_disable */ 1205 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1206 (void (*)(int, int))NULL, /* psm_preshutdown */ 1207 xen_intr_ops /* Advanced DDI Interrupt framework */ 1208 }; 1209 1210 static struct psm_info xen_psm_info = { 1211 PSM_INFO_VER01_5, /* version */ 1212 PSM_OWN_EXCLUSIVE, /* ownership */ 1213 &xen_psm_ops, /* operation */ 1214 "xVM_psm", /* machine name */ 1215 "platform module %I%" /* machine descriptions */ 1216 }; 1217 1218 static void *xen_psm_hdlp; 1219 1220 int 1221 _init(void) 1222 { 1223 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1224 } 1225 1226 int 1227 _fini(void) 1228 { 1229 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1230 } 1231 1232 int 1233 _info(struct modinfo *modinfop) 1234 { 1235 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1236 } 1237