1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define PSMI_1_5 30 31 #include <sys/mutex.h> 32 #include <sys/types.h> 33 #include <sys/time.h> 34 #include <sys/clock.h> 35 #include <sys/machlock.h> 36 #include <sys/smp_impldefs.h> 37 #include <sys/uadmin.h> 38 #include <sys/promif.h> 39 #include <sys/psm.h> 40 #include <sys/psm_common.h> 41 #include <sys/atomic.h> 42 #include <sys/apic.h> 43 #include <sys/archsystm.h> 44 #include <sys/mach_intr.h> 45 #include <sys/hypervisor.h> 46 #include <sys/evtchn_impl.h> 47 #include <sys/modctl.h> 48 #include <sys/trap.h> 49 #include <sys/panic.h> 50 51 #include <xen/public/vcpu.h> 52 #include <xen/public/physdev.h> 53 54 55 /* 56 * Global Data 57 */ 58 59 int xen_psm_verbose = 0; 60 61 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 62 int apic_error = 0; 63 int apic_verbose = 0; 64 cpuset_t apic_cpumask; 65 int apic_forceload = 0; 66 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 67 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 68 }; 69 uchar_t apic_ipltopri[MAXIPL + 1]; 70 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 71 uint_t apic_picinit_called; 72 apic_cpus_info_t *apic_cpus; 73 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 74 /* use to make sure only one cpu handles the nmi */ 75 static lock_t xen_psm_nmi_lock; 76 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 77 int xen_psm_panic_on_nmi = 0; 78 int xen_psm_num_nmis = 0; 79 80 cpuset_t xen_psm_cpus_online; /* online cpus */ 81 int xen_psm_ncpus = 1; /* cpu count */ 82 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 83 84 /* 85 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 86 * support MSI at all. Change this initialization to zero when MSI is 87 * supported. 88 */ 89 int xen_support_msi = -1; 90 91 static int xen_clock_irq = INVALID_IRQ; 92 93 /* flag definitions for xen_psm_verbose */ 94 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 95 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 96 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 97 98 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 99 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 100 cmn_err fmt; 101 102 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 103 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 104 prom_printf fmt; 105 106 /* 107 * Dummy apic array to point common routines at that want to do some apic 108 * manipulation. Xen doesn't allow guest apic access so we point at these 109 * memory locations to fake out those who want to do apic fiddling. 110 */ 111 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 112 113 static struct psm_info xen_psm_info; 114 static void xen_psm_setspl(int); 115 116 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 117 118 /* 119 * Local support routines 120 */ 121 122 /* 123 * Select vcpu to bind xen virtual device interrupt to. 124 */ 125 /*ARGSUSED*/ 126 int 127 xen_psm_bind_intr(int irq) 128 { 129 int bind_cpu, test_cpu; 130 apic_irq_t *irqptr; 131 132 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 133 return (IRQ_UNBOUND); 134 if (irq <= APIC_MAX_VECTOR) 135 irqptr = apic_irq_table[irq]; 136 else 137 irqptr = NULL; 138 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 139 bind_cpu = irqptr->airq_cpu; 140 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 141 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 142 bind_cpu = 0; 143 goto done; 144 } 145 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 146 do { 147 bind_cpu = xen_psm_next_bind_cpu++; 148 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 149 xen_psm_next_bind_cpu = 0; 150 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 151 } else { 152 bind_cpu = 0; 153 } 154 done: 155 return (bind_cpu); 156 } 157 158 /* 159 * Autoconfiguration Routines 160 */ 161 162 static int 163 xen_psm_probe(void) 164 { 165 int ret = PSM_SUCCESS; 166 167 if (DOMAIN_IS_INITDOMAIN(xen_info)) 168 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 169 return (ret); 170 } 171 172 static void 173 xen_psm_softinit(void) 174 { 175 /* LINTED logical expression always true: op "||" */ 176 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 177 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 178 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 179 apic_init_common(); 180 } 181 } 182 183 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 184 185 /*ARGSUSED*/ 186 static int 187 xen_psm_clkinit(int hertz) 188 { 189 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 190 extern int dosynctodr; 191 192 /* 193 * domU cannot set the TOD hardware, fault the TOD clock now to 194 * indicate that and turn off attempts to sync TOD hardware 195 * with the hires timer. 196 */ 197 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 198 mutex_enter(&tod_lock); 199 (void) tod_fault(TOD_RDONLY, 0); 200 dosynctodr = 0; 201 mutex_exit(&tod_lock); 202 } 203 /* 204 * The hypervisor provides a timer based on the local APIC timer. 205 * The interface supports requests of nanosecond resolution. 206 * A common frequency of the apic clock is 100 Mhz which 207 * gives a resolution of 10 nsec per tick. What we would really like 208 * is a way to get the ns per tick value from xen. 209 * XXPV - This is an assumption that needs checking and may change 210 */ 211 return (XEN_NSEC_PER_TICK); 212 } 213 214 static void 215 xen_psm_hrtimeinit(void) 216 { 217 extern int gethrtime_hires; 218 gethrtime_hires = 1; 219 } 220 221 /* xen_psm NMI handler */ 222 /*ARGSUSED*/ 223 static void 224 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 225 { 226 xen_psm_num_nmis++; 227 228 if (!lock_try(&xen_psm_nmi_lock)) 229 return; 230 231 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 232 debug_enter("NMI received: entering kmdb\n"); 233 } else if (xen_psm_panic_on_nmi) { 234 /* Keep panic from entering kmdb. */ 235 nopanicdebug = 1; 236 panic("NMI received\n"); 237 } else { 238 /* 239 * prom_printf is the best shot we have of something which is 240 * problem free from high level/NMI type of interrupts 241 */ 242 prom_printf("NMI received\n"); 243 } 244 245 lock_clear(&xen_psm_nmi_lock); 246 } 247 248 static void 249 xen_psm_picinit() 250 { 251 int cpu, irqno; 252 cpuset_t cpus; 253 254 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 255 /* set a flag so we know we have run xen_psm_picinit() */ 256 apic_picinit_called = 1; 257 LOCK_INIT_CLEAR(&apic_ioapic_lock); 258 259 /* XXPV - do we need to do this? */ 260 picsetup(); /* initialise the 8259 */ 261 262 /* enable apic mode if imcr present */ 263 /* XXPV - do we need to do this either? */ 264 if (apic_imcrp) { 265 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 266 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 267 } 268 269 ioapic_init_intr(IOAPIC_NOMASK); 270 /* 271 * We never called xen_psm_addspl() when the SCI 272 * interrupt was added because that happened before the 273 * PSM module was loaded. Fix that up here by doing 274 * any missed operations (e.g. bind to CPU) 275 */ 276 if ((irqno = apic_sci_vect) > 0) { 277 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 278 CPUSET_ZERO(cpus); 279 CPUSET_OR(cpus, xen_psm_cpus_online); 280 } else { 281 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 282 } 283 ec_set_irq_affinity(irqno, cpus); 284 apic_irq_table[irqno]->airq_temp_cpu = 285 (uchar_t)(cpu & ~IRQ_USER_BOUND); 286 ec_enable_irq(irqno); 287 } 288 } 289 290 /* add nmi handler - least priority nmi handler */ 291 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 292 293 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 294 "xVM_psm NMI handler", (caddr_t)NULL)) 295 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 296 } 297 298 299 /* 300 * generates an interprocessor interrupt to another CPU 301 */ 302 static void 303 xen_psm_send_ipi(int cpun, int ipl) 304 { 305 ulong_t flag = intr_clear(); 306 307 ec_send_ipi(ipl, cpun); 308 intr_restore(flag); 309 } 310 311 /*ARGSUSED*/ 312 static int 313 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 314 { 315 int cpu, ret; 316 cpuset_t cpus; 317 318 /* 319 * We are called at splhi() so we can't call anything that might end 320 * up trying to context switch. 321 */ 322 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 323 DOMAIN_IS_INITDOMAIN(xen_info)) { 324 /* 325 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 326 */ 327 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 328 } else { 329 /* 330 * Set priority/affinity/enable for non PIRQs 331 */ 332 ret = ec_set_irq_priority(irqno, ipl); 333 ASSERT(ret == 0); 334 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 335 CPUSET_ZERO(cpus); 336 CPUSET_OR(cpus, xen_psm_cpus_online); 337 } else { 338 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 339 } 340 ec_set_irq_affinity(irqno, cpus); 341 ec_enable_irq(irqno); 342 } 343 return (ret); 344 } 345 346 /* 347 * Acquire ownership of this irq on this cpu 348 */ 349 void 350 xen_psm_acquire_irq(int irq) 351 { 352 ulong_t flags; 353 int cpuid; 354 355 /* 356 * If the irq is currently being serviced by another cpu 357 * we busy-wait for the other cpu to finish. Take any 358 * pending interrupts before retrying. 359 */ 360 do { 361 flags = intr_clear(); 362 cpuid = ec_block_irq(irq); 363 intr_restore(flags); 364 } while (cpuid != CPU->cpu_id); 365 } 366 367 /*ARGSUSED*/ 368 static int 369 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 370 { 371 apic_irq_t *irqptr; 372 int err = PSM_SUCCESS; 373 374 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 375 DOMAIN_IS_INITDOMAIN(xen_info)) { 376 irqptr = apic_irq_table[irqno]; 377 /* 378 * unbind if no more sharers of this irq/evtchn 379 */ 380 if (irqptr->airq_share == 1) { 381 xen_psm_acquire_irq(irqno); 382 ec_unbind_irq(irqno); 383 } 384 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 385 /* 386 * If still in use reset priority 387 */ 388 if (!err && irqptr->airq_share != 0) { 389 err = ec_set_irq_priority(irqno, max_ipl); 390 return (err); 391 } 392 } else { 393 xen_psm_acquire_irq(irqno); 394 ec_unbind_irq(irqno); 395 } 396 return (err); 397 } 398 399 static processorid_t 400 xen_psm_get_next_processorid(processorid_t id) 401 { 402 if (id == -1) 403 return (0); 404 405 for (id++; id < NCPU; id++) { 406 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 407 case 0: /* yeah, that one's there */ 408 return (id); 409 default: 410 case X_EINVAL: /* out of range */ 411 return (-1); 412 case X_ENOENT: /* not present in the domain */ 413 /* 414 * It's not clear that we -need- to keep looking 415 * at this point, if, e.g., we can guarantee 416 * the hypervisor always keeps a contiguous range 417 * of vcpus around this is equivalent to "out of range". 418 * 419 * But it would be sad to miss a vcpu we're 420 * supposed to be using .. 421 */ 422 break; 423 } 424 } 425 426 return (-1); 427 } 428 429 /* 430 * XXPV - undo the start cpu op change; return to ignoring this value 431 * - also tweak error handling in main startup loop 432 */ 433 /*ARGSUSED*/ 434 static int 435 xen_psm_cpu_start(processorid_t id, caddr_t arg) 436 { 437 int ret; 438 439 ASSERT(id > 0); 440 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 441 ec_bind_cpu_ipis(id); 442 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 443 if ((ret = xen_vcpu_up(id)) == 0) 444 xen_psm_ncpus++; 445 else 446 ret = EINVAL; 447 return (ret); 448 } 449 450 /* 451 * Allocate an irq for inter cpu signaling 452 */ 453 /*ARGSUSED*/ 454 static int 455 xen_psm_get_ipivect(int ipl, int type) 456 { 457 return (ec_bind_ipi_to_irq(ipl, 0)); 458 } 459 460 /*ARGSUSED*/ 461 static int 462 xen_psm_get_clockirq(int ipl) 463 { 464 if (xen_clock_irq != INVALID_IRQ) 465 return (xen_clock_irq); 466 467 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 468 return (xen_clock_irq); 469 } 470 471 /*ARGSUSED*/ 472 static void 473 xen_psm_shutdown(int cmd, int fcn) 474 { 475 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 476 477 switch (cmd) { 478 case A_SHUTDOWN: 479 switch (fcn) { 480 case AD_BOOT: 481 case AD_IBOOT: 482 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 483 break; 484 case AD_POWEROFF: 485 /* fall through if domU or if poweroff fails */ 486 if (DOMAIN_IS_INITDOMAIN(xen_info)) 487 if (apic_enable_acpi) 488 (void) acpi_poweroff(); 489 /* FALLTHRU */ 490 case AD_HALT: 491 default: 492 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 493 break; 494 } 495 break; 496 case A_REBOOT: 497 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 498 break; 499 default: 500 return; 501 } 502 } 503 504 505 static int 506 xen_psm_translate_irq(dev_info_t *dip, int irqno) 507 { 508 if (dip == NULL) { 509 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 510 " dip = NULL\n", irqno)); 511 return (irqno); 512 } 513 return (irqno); 514 } 515 516 /* 517 * xen_psm_intr_enter() acks the event that triggered the interrupt and 518 * returns the new priority level, 519 */ 520 /*ARGSUSED*/ 521 static int 522 xen_psm_intr_enter(int ipl, int *vector) 523 { 524 int newipl; 525 uint_t intno; 526 cpu_t *cpu = CPU; 527 528 intno = (*vector); 529 530 ASSERT(intno < NR_IRQS); 531 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 532 533 ec_clear_irq(intno); 534 535 newipl = autovect[intno].avh_hi_pri; 536 if (newipl == 0) { 537 /* 538 * (newipl == 0) means we have no service routines for this 539 * vector. We will treat this as a spurious interrupt. 540 * We have cleared the pending bit already, clear the event 541 * mask and return a spurious interrupt. This case can happen 542 * when an interrupt delivery is racing with the removal of 543 * of the service routine for that interrupt. 544 */ 545 ec_unmask_irq(intno); 546 newipl = -1; /* flag spurious interrupt */ 547 } else if (newipl <= cpu->cpu_pri) { 548 /* 549 * (newipl <= cpu->cpu_pri) means that we must be trying to 550 * service a vector that was shared with a higher priority 551 * isr. The higher priority handler has been removed and 552 * we need to service this int. We can't return a lower 553 * priority than current cpu priority. Just synthesize a 554 * priority to return that should be acceptable. 555 */ 556 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 557 } 558 return (newipl); 559 } 560 561 562 /* 563 * xen_psm_intr_exit() restores the old interrupt 564 * priority level after processing an interrupt. 565 * It is called with interrupts disabled, and does not enable interrupts. 566 */ 567 /* ARGSUSED */ 568 static void 569 xen_psm_intr_exit(int ipl, int vector) 570 { 571 ec_try_unmask_irq(vector); 572 xen_psm_setspl(ipl); 573 } 574 575 intr_exit_fn_t 576 psm_intr_exit_fn(void) 577 { 578 return (xen_psm_intr_exit); 579 } 580 581 /* 582 * Check if new ipl level allows delivery of previously unserviced events 583 */ 584 static void 585 xen_psm_setspl(int ipl) 586 { 587 struct cpu *cpu = CPU; 588 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 589 uint16_t pending; 590 591 ASSERT(vci->evtchn_upcall_mask != 0); 592 593 /* 594 * If new ipl level will enable any pending interrupts, setup so the 595 * upcoming sti will cause us to get an upcall. 596 */ 597 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 598 if (pending) { 599 int i; 600 ulong_t pending_sels = 0; 601 volatile ulong_t *selp; 602 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 603 604 for (i = bsrw_insn(pending); i > ipl; i--) 605 pending_sels |= cpe->pending_sel[i]; 606 ASSERT(pending_sels); 607 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 608 atomic_or_ulong(selp, pending_sels); 609 vci->evtchn_upcall_pending = 1; 610 } 611 } 612 613 /* 614 * This function provides external interface to the nexus for all 615 * functionality related to the new DDI interrupt framework. 616 * 617 * Input: 618 * dip - pointer to the dev_info structure of the requested device 619 * hdlp - pointer to the internal interrupt handle structure for the 620 * requested interrupt 621 * intr_op - opcode for this call 622 * result - pointer to the integer that will hold the result to be 623 * passed back if return value is PSM_SUCCESS 624 * 625 * Output: 626 * return value is either PSM_SUCCESS or PSM_FAILURE 627 */ 628 int 629 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 630 psm_intr_op_t intr_op, int *result) 631 { 632 int cap; 633 int err; 634 int new_priority; 635 apic_irq_t *irqp; 636 struct intrspec *ispec; 637 638 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 639 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 640 641 switch (intr_op) { 642 case PSM_INTR_OP_CHECK_MSI: 643 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 644 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 645 DDI_INTR_TYPE_MSIX); 646 break; 647 } 648 /* 649 * Check MSI/X is supported or not at APIC level and 650 * masked off the MSI/X bits in hdlp->ih_type if not 651 * supported before return. If MSI/X is supported, 652 * leave the ih_type unchanged and return. 653 * 654 * hdlp->ih_type passed in from the nexus has all the 655 * interrupt types supported by the device. 656 */ 657 if (xen_support_msi == 0) { 658 /* 659 * if xen_support_msi is not set, call 660 * apic_check_msi_support() to check whether msi 661 * is supported first 662 */ 663 if (apic_check_msi_support() == PSM_SUCCESS) 664 xen_support_msi = 1; 665 else 666 xen_support_msi = -1; 667 } 668 if (xen_support_msi == 1) 669 *result = hdlp->ih_type; 670 else 671 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 672 DDI_INTR_TYPE_MSIX); 673 break; 674 case PSM_INTR_OP_ALLOC_VECTORS: 675 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 676 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 677 (int)(uintptr_t)hdlp->ih_scratch2); 678 break; 679 case PSM_INTR_OP_FREE_VECTORS: 680 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 681 hdlp->ih_pri, hdlp->ih_type); 682 break; 683 case PSM_INTR_OP_NAVAIL_VECTORS: 684 /* 685 * XXPV - maybe we should make this be: 686 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 687 */ 688 if (DOMAIN_IS_INITDOMAIN(xen_info)) 689 *result = APIC_VECTOR_PER_IPL; 690 else 691 *result = 1; 692 break; 693 case PSM_INTR_OP_XLATE_VECTOR: 694 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 695 if (ispec->intrspec_vec >= PIRQ_BASE && 696 ispec->intrspec_vec < NR_PIRQS && 697 DOMAIN_IS_INITDOMAIN(xen_info)) { 698 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 699 } else { 700 *result = ispec->intrspec_vec; 701 } 702 break; 703 case PSM_INTR_OP_GET_PENDING: 704 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 705 *result = ec_pending_irq(hdlp->ih_vector); 706 break; 707 case PSM_INTR_OP_CLEAR_MASK: 708 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 709 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 710 return (PSM_FAILURE); 711 ec_enable_irq(hdlp->ih_vector); 712 break; 713 case PSM_INTR_OP_SET_MASK: 714 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 715 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 716 return (PSM_FAILURE); 717 ec_disable_irq(hdlp->ih_vector); 718 break; 719 case PSM_INTR_OP_GET_CAP: 720 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 721 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 722 cap |= DDI_INTR_FLAG_MASKABLE; 723 *result = cap; 724 break; 725 case PSM_INTR_OP_GET_SHARED: 726 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 727 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 728 return (PSM_FAILURE); 729 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 730 == NULL) 731 return (PSM_FAILURE); 732 *result = irqp->airq_share ? 1: 0; 733 } else { 734 return (PSM_FAILURE); 735 } 736 break; 737 case PSM_INTR_OP_SET_PRI: 738 new_priority = *(int *)result; 739 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 740 if (err != 0) 741 return (PSM_FAILURE); 742 break; 743 case PSM_INTR_OP_GET_INTR: 744 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 745 return (PSM_FAILURE); 746 /* 747 * The interrupt handle given here has been allocated 748 * specifically for this command, and ih_private carries 749 * a pointer to a apic_get_intr_t. 750 */ 751 if (apic_get_vector_intr_info( 752 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 753 return (PSM_FAILURE); 754 break; 755 case PSM_INTR_OP_SET_CAP: 756 /* FALLTHRU */ 757 default: 758 return (PSM_FAILURE); 759 } 760 return (PSM_SUCCESS); 761 } 762 763 static void 764 xen_psm_rebind_irq(int irq) 765 { 766 cpuset_t ncpu; 767 processorid_t newcpu; 768 apic_irq_t *irqptr; 769 770 newcpu = xen_psm_bind_intr(irq); 771 if (newcpu == IRQ_UNBOUND) { 772 CPUSET_ZERO(ncpu); 773 CPUSET_OR(ncpu, xen_psm_cpus_online); 774 } else { 775 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 776 } 777 ec_set_irq_affinity(irq, ncpu); 778 if (irq <= APIC_MAX_VECTOR) { 779 irqptr = apic_irq_table[irq]; 780 ASSERT(irqptr != NULL); 781 irqptr->airq_temp_cpu = (uchar_t)newcpu; 782 } 783 } 784 785 /* 786 * Disable all device interrupts for the given cpu. 787 * High priority interrupts are not disabled and will still be serviced. 788 */ 789 static int 790 xen_psm_disable_intr(processorid_t cpun) 791 { 792 int irq; 793 794 /* 795 * Can't offline VCPU 0 on this hypervisor. There's no reason 796 * anyone would want to given that the CPUs are virtual. Also note 797 * that the hypervisor requires suspend/resume to be on VCPU 0. 798 */ 799 if (cpun == 0) 800 return (PSM_FAILURE); 801 802 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 803 for (irq = 0; irq < NR_IRQS; irq++) { 804 if (!ec_irq_needs_rebind(irq, cpun)) 805 continue; 806 xen_psm_rebind_irq(irq); 807 } 808 return (PSM_SUCCESS); 809 } 810 811 static void 812 xen_psm_enable_intr(processorid_t cpun) 813 { 814 int irq; 815 816 if (cpun == 0) 817 return; 818 819 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 820 821 /* 822 * Rebalance device interrupts among online processors 823 */ 824 for (irq = 0; irq < NR_IRQS; irq++) { 825 if (!ec_irq_rebindable(irq)) 826 continue; 827 xen_psm_rebind_irq(irq); 828 } 829 } 830 831 static int 832 xen_psm_post_cpu_start() 833 { 834 processorid_t cpun; 835 836 cpun = psm_get_cpu_id(); 837 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 838 apic_cpus[cpun].aci_status = 839 APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 840 } 841 /* 842 * Re-distribute interrupts to include the newly added cpu. 843 */ 844 xen_psm_enable_intr(cpun); 845 return (PSM_SUCCESS); 846 } 847 848 /* 849 * This function will reprogram the timer. 850 * 851 * When in oneshot mode the argument is the absolute time in future at which to 852 * generate the interrupt. 853 * 854 * When in periodic mode, the argument is the interval at which the 855 * interrupts should be generated. There is no need to support the periodic 856 * mode timer change at this time. 857 * 858 * Note that we must be careful to convert from hrtime to Xen system time (see 859 * xpv_timestamp.c). 860 */ 861 static void 862 xen_psm_timer_reprogram(hrtime_t timer_req) 863 { 864 hrtime_t now, timer_new, time_delta, xen_time; 865 ulong_t flags; 866 867 flags = intr_clear(); 868 /* 869 * We should be called from high PIL context (CBE_HIGH_PIL), 870 * so kpreempt is disabled. 871 */ 872 873 now = xpv_gethrtime(); 874 xen_time = xpv_getsystime(); 875 if (timer_req <= now) { 876 /* 877 * requested to generate an interrupt in the past 878 * generate an interrupt as soon as possible 879 */ 880 time_delta = XEN_NSEC_PER_TICK; 881 } else 882 time_delta = timer_req - now; 883 884 timer_new = xen_time + time_delta; 885 if (HYPERVISOR_set_timer_op(timer_new) != 0) 886 panic("can't set hypervisor timer?"); 887 intr_restore(flags); 888 } 889 890 /* 891 * This function will enable timer interrupts. 892 */ 893 static void 894 xen_psm_timer_enable(void) 895 { 896 ec_unmask_irq(xen_clock_irq); 897 } 898 899 /* 900 * This function will disable timer interrupts on the current cpu. 901 */ 902 static void 903 xen_psm_timer_disable(void) 904 { 905 (void) ec_block_irq(xen_clock_irq); 906 /* 907 * If the clock irq is pending on this cpu then we need to 908 * clear the pending interrupt. 909 */ 910 ec_unpend_irq(xen_clock_irq); 911 } 912 913 /* 914 * 915 * The following functions are in the platform specific file so that they 916 * can be different functions depending on whether we are running on 917 * bare metal or a hypervisor. 918 */ 919 920 /* 921 * Allocate a free vector for irq at ipl. 922 */ 923 /* ARGSUSED */ 924 uchar_t 925 apic_allocate_vector(int ipl, int irq, int pri) 926 { 927 physdev_irq_t irq_op; 928 uchar_t vector; 929 930 irq_op.irq = irq; 931 932 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 933 panic("Hypervisor alloc vector failed"); 934 vector = irq_op.vector; 935 /* 936 * No need to worry about vector colliding with our reserved vectors 937 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 938 * generated traps and handle them properly. 939 */ 940 apic_vector_to_irq[vector] = (uchar_t)irq; 941 return (vector); 942 } 943 944 /* Mark vector as not being used by any irq */ 945 void 946 apic_free_vector(uchar_t vector) 947 { 948 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 949 } 950 951 /* 952 * This function allocate "count" vector(s) for the given "dip/pri/type" 953 */ 954 static int 955 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 956 int behavior) 957 { 958 int rcount, i; 959 uchar_t vector, cpu; 960 int irqno; 961 major_t major; 962 apic_irq_t *irqptr; 963 964 /* only supports MSI at the moment, will add MSI-X support later */ 965 if (type != DDI_INTR_TYPE_MSI) 966 return (0); 967 968 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 969 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 970 (void *)dip, type, inum, pri, count, behavior)); 971 972 if (count > 1) { 973 if (behavior == DDI_INTR_ALLOC_STRICT && 974 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 975 return (0); 976 977 if (apic_multi_msi_enable == 0) 978 count = 1; 979 else if (count > apic_multi_msi_max) 980 count = apic_multi_msi_max; 981 } 982 983 /* 984 * XXPV - metal version takes all vectors avail at given pri. 985 * Why do that? For now just allocate count vectors. 986 */ 987 rcount = count; 988 989 mutex_enter(&airq_mutex); 990 991 /* 992 * XXPV - currently the hypervisor does not support MSI at all. 993 * It doesn't return consecutive vectors. This code is a first 994 * cut for the (future) time that MSI is supported. 995 */ 996 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 997 for (i = 0; i < rcount; i++) { 998 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 999 INVALID_IRQ) { 1000 mutex_exit(&airq_mutex); 1001 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1002 "apic_allocate_irq failed\n")); 1003 return (i); 1004 } 1005 apic_max_device_irq = max(irqno, apic_max_device_irq); 1006 apic_min_device_irq = min(irqno, apic_min_device_irq); 1007 irqptr = apic_irq_table[irqno]; 1008 vector = apic_allocate_vector(pri, irqno, 0); 1009 apic_vector_to_irq[vector] = (uchar_t)irqno; 1010 #ifdef DEBUG 1011 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1012 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1013 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1014 #endif 1015 1016 irqptr->airq_vector = vector; 1017 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1018 irqptr->airq_intin_no = (uchar_t)rcount; 1019 irqptr->airq_ipl = pri; 1020 irqptr->airq_origirq = (uchar_t)(inum + i); 1021 irqptr->airq_share_id = 0; 1022 irqptr->airq_mps_intr_index = MSI_INDEX; 1023 irqptr->airq_dip = dip; 1024 irqptr->airq_major = major; 1025 if (i == 0) /* they all bound to the same cpu */ 1026 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1027 0xff, 0xff); 1028 else 1029 irqptr->airq_cpu = cpu; 1030 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1031 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1032 (void *)irqptr->airq_dip, irqptr->airq_vector, 1033 irqptr->airq_origirq, pri)); 1034 } 1035 mutex_exit(&airq_mutex); 1036 return (rcount); 1037 } 1038 1039 /* 1040 * The hypervisor doesn't permit access to local apics directly 1041 */ 1042 /* ARGSUSED */ 1043 uint32_t * 1044 mapin_apic(uint32_t addr, size_t len, int flags) 1045 { 1046 /* 1047 * Return a pointer to a memory area to fake out the 1048 * probe code that wants to read apic registers. 1049 * The dummy values will end up being ignored by xen 1050 * later on when they are used anyway. 1051 */ 1052 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1053 return (xen_psm_dummy_apic); 1054 } 1055 1056 /* ARGSUSED */ 1057 uint32_t * 1058 mapin_ioapic(uint32_t addr, size_t len, int flags) 1059 { 1060 /* 1061 * Return non-null here to fake out configure code that calls this. 1062 * The i86xpv platform will not reference through the returned value.. 1063 */ 1064 return ((uint32_t *)0x1); 1065 } 1066 1067 /* ARGSUSED */ 1068 void 1069 mapout_apic(caddr_t addr, size_t len) 1070 { 1071 } 1072 1073 /* ARGSUSED */ 1074 void 1075 mapout_ioapic(caddr_t addr, size_t len) 1076 { 1077 } 1078 1079 uint32_t 1080 ioapic_read(int apic_ix, uint32_t reg) 1081 { 1082 physdev_apic_t apic; 1083 1084 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1085 apic.reg = reg; 1086 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1087 panic("read ioapic %d reg %d failed", apic_ix, reg); 1088 return (apic.value); 1089 } 1090 1091 void 1092 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1093 { 1094 physdev_apic_t apic; 1095 1096 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1097 apic.reg = reg; 1098 apic.value = value; 1099 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1100 panic("write ioapic %d reg %d failed", apic_ix, reg); 1101 } 1102 1103 /* 1104 * Call rebind to do the actual programming. 1105 */ 1106 int 1107 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1108 { 1109 apic_irq_t *irqptr; 1110 struct ioapic_reprogram_data *drep = NULL; 1111 int rv, cpu; 1112 cpuset_t cpus; 1113 1114 /* 1115 * Set cpu based on xen idea of online cpu's not apic tables. 1116 * Note that xen ignores/sets to it's own preferred value the 1117 * target cpu field when programming ioapic anyway. 1118 */ 1119 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1120 CPUSET_ZERO(cpus); 1121 CPUSET_OR(cpus, xen_psm_cpus_online); 1122 } else { 1123 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1124 } 1125 apic_irq_table[irq]->airq_cpu = cpu; 1126 if (deferred) { 1127 drep = (struct ioapic_reprogram_data *)p; 1128 ASSERT(drep != NULL); 1129 irqptr = drep->irqp; 1130 } else { 1131 irqptr = (apic_irq_t *)p; 1132 } 1133 ASSERT(irqptr != NULL); 1134 rv = apic_rebind(irqptr, cpu, drep); 1135 if (rv) { 1136 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1137 cpu = 0; 1138 rv = apic_rebind(irqptr, cpu, drep); 1139 } 1140 /* 1141 * If rebind successful bind the irq to an event channel 1142 */ 1143 if (rv == 0) { 1144 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1145 CPUSET_FIND(cpus, cpu); 1146 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1147 } 1148 return (rv); 1149 } 1150 1151 /* 1152 * Allocate a new vector for the given irq 1153 */ 1154 /* ARGSUSED */ 1155 uchar_t 1156 apic_modify_vector(uchar_t vector, int irq) 1157 { 1158 return (apic_allocate_vector(0, irq, 0)); 1159 } 1160 1161 /* 1162 * The rest of the file is just generic psm module boilerplate 1163 */ 1164 1165 static struct psm_ops xen_psm_ops = { 1166 xen_psm_probe, /* psm_probe */ 1167 1168 xen_psm_softinit, /* psm_init */ 1169 xen_psm_picinit, /* psm_picinit */ 1170 xen_psm_intr_enter, /* psm_intr_enter */ 1171 xen_psm_intr_exit, /* psm_intr_exit */ 1172 xen_psm_setspl, /* psm_setspl */ 1173 xen_psm_addspl, /* psm_addspl */ 1174 xen_psm_delspl, /* psm_delspl */ 1175 xen_psm_disable_intr, /* psm_disable_intr */ 1176 xen_psm_enable_intr, /* psm_enable_intr */ 1177 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1178 (void (*)(int))NULL, /* psm_set_softintr */ 1179 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1180 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1181 1182 xen_psm_clkinit, /* psm_clkinit */ 1183 xen_psm_get_clockirq, /* psm_get_clockirq */ 1184 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1185 xpv_gethrtime, /* psm_gethrtime */ 1186 1187 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1188 xen_psm_cpu_start, /* psm_cpu_start */ 1189 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1190 xen_psm_shutdown, /* psm_shutdown */ 1191 xen_psm_get_ipivect, /* psm_get_ipivect */ 1192 xen_psm_send_ipi, /* psm_send_ipi */ 1193 1194 xen_psm_translate_irq, /* psm_translate_irq */ 1195 1196 (void (*)(int, char *))NULL, /* psm_notify_error */ 1197 (void (*)(int msg))NULL, /* psm_notify_func */ 1198 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1199 xen_psm_timer_enable, /* psm_timer_enable */ 1200 xen_psm_timer_disable, /* psm_timer_disable */ 1201 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1202 (void (*)(int, int))NULL, /* psm_preshutdown */ 1203 xen_intr_ops /* Advanced DDI Interrupt framework */ 1204 }; 1205 1206 static struct psm_info xen_psm_info = { 1207 PSM_INFO_VER01_5, /* version */ 1208 PSM_OWN_EXCLUSIVE, /* ownership */ 1209 &xen_psm_ops, /* operation */ 1210 "xVM_psm", /* machine name */ 1211 "platform module %I%" /* machine descriptions */ 1212 }; 1213 1214 static void *xen_psm_hdlp; 1215 1216 int 1217 _init(void) 1218 { 1219 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1220 } 1221 1222 int 1223 _fini(void) 1224 { 1225 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1226 } 1227 1228 int 1229 _info(struct modinfo *modinfop) 1230 { 1231 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1232 } 1233