1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define PSMI_1_5 30 31 #include <sys/mutex.h> 32 #include <sys/types.h> 33 #include <sys/time.h> 34 #include <sys/clock.h> 35 #include <sys/machlock.h> 36 #include <sys/smp_impldefs.h> 37 #include <sys/uadmin.h> 38 #include <sys/promif.h> 39 #include <sys/psm.h> 40 #include <sys/psm_common.h> 41 #include <sys/atomic.h> 42 #include <sys/apic.h> 43 #include <sys/archsystm.h> 44 #include <sys/mach_intr.h> 45 #include <sys/hypervisor.h> 46 #include <sys/evtchn_impl.h> 47 #include <sys/modctl.h> 48 #include <sys/trap.h> 49 #include <sys/panic.h> 50 51 #include <xen/public/vcpu.h> 52 #include <xen/public/physdev.h> 53 54 55 /* 56 * Global Data 57 */ 58 59 int xen_psm_verbose = 0; 60 61 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 62 int apic_error = 0; 63 int apic_verbose = 0; 64 cpuset_t apic_cpumask; 65 int apic_forceload = 0; 66 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 67 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 68 }; 69 uchar_t apic_ipltopri[MAXIPL + 1]; 70 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 71 uint_t apic_picinit_called; 72 apic_cpus_info_t *apic_cpus; 73 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 74 /* use to make sure only one cpu handles the nmi */ 75 static lock_t xen_psm_nmi_lock; 76 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 77 int xen_psm_panic_on_nmi = 0; 78 int xen_psm_num_nmis = 0; 79 80 cpuset_t xen_psm_cpus_online; /* online cpus */ 81 int xen_psm_ncpus = 1; /* cpu count */ 82 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 83 84 /* 85 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 86 * support MSI at all. Change this initialization to zero when MSI is 87 * supported. 88 */ 89 int xen_support_msi = -1; 90 91 static int xen_clock_irq = INVALID_IRQ; 92 93 /* flag definitions for xen_psm_verbose */ 94 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 95 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 96 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 97 98 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 99 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 100 cmn_err fmt; 101 102 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 103 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 104 prom_printf fmt; 105 106 /* 107 * Dummy apic array to point common routines at that want to do some apic 108 * manipulation. Xen doesn't allow guest apic access so we point at these 109 * memory locations to fake out those who want to do apic fiddling. 110 */ 111 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 112 113 static struct psm_info xen_psm_info; 114 static void xen_psm_setspl(int); 115 116 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 117 118 /* 119 * Local support routines 120 */ 121 122 /* 123 * Select vcpu to bind xen virtual device interrupt to. 124 */ 125 /*ARGSUSED*/ 126 int 127 xen_psm_bind_intr(int irq) 128 { 129 int bind_cpu, test_cpu; 130 apic_irq_t *irqptr; 131 132 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 133 return (IRQ_UNBOUND); 134 if (irq <= APIC_MAX_VECTOR) 135 irqptr = apic_irq_table[irq]; 136 else 137 irqptr = NULL; 138 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 139 bind_cpu = irqptr->airq_cpu; 140 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 141 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 142 bind_cpu = 0; 143 goto done; 144 } 145 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 146 do { 147 bind_cpu = xen_psm_next_bind_cpu++; 148 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 149 xen_psm_next_bind_cpu = 0; 150 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 151 } else { 152 bind_cpu = 0; 153 } 154 done: 155 return (bind_cpu); 156 } 157 158 /* 159 * Autoconfiguration Routines 160 */ 161 162 static int 163 xen_psm_probe(void) 164 { 165 int ret = PSM_SUCCESS; 166 167 if (DOMAIN_IS_INITDOMAIN(xen_info)) 168 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 169 return (ret); 170 } 171 172 static void 173 xen_psm_softinit(void) 174 { 175 /* LINTED logical expression always true: op "||" */ 176 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 177 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); 178 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 179 apic_init_common(); 180 } 181 } 182 183 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 184 185 /*ARGSUSED*/ 186 static int 187 xen_psm_clkinit(int hertz) 188 { 189 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 190 extern int dosynctodr; 191 192 /* 193 * domU cannot set the TOD hardware, fault the TOD clock now to 194 * indicate that and turn off attempts to sync TOD hardware 195 * with the hires timer. 196 */ 197 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 198 mutex_enter(&tod_lock); 199 (void) tod_fault(TOD_RDONLY, 0); 200 dosynctodr = 0; 201 mutex_exit(&tod_lock); 202 } 203 /* 204 * The hypervisor provides a timer based on the local APIC timer. 205 * The interface supports requests of nanosecond resolution. 206 * A common frequency of the apic clock is 100 Mhz which 207 * gives a resolution of 10 nsec per tick. What we would really like 208 * is a way to get the ns per tick value from xen. 209 * XXPV - This is an assumption that needs checking and may change 210 */ 211 return (XEN_NSEC_PER_TICK); 212 } 213 214 static void 215 xen_psm_hrtimeinit(void) 216 { 217 extern int gethrtime_hires; 218 gethrtime_hires = 1; 219 } 220 221 /* xen_psm NMI handler */ 222 /*ARGSUSED*/ 223 static void 224 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 225 { 226 xen_psm_num_nmis++; 227 228 if (!lock_try(&xen_psm_nmi_lock)) 229 return; 230 231 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 232 debug_enter("NMI received: entering kmdb\n"); 233 } else if (xen_psm_panic_on_nmi) { 234 /* Keep panic from entering kmdb. */ 235 nopanicdebug = 1; 236 panic("NMI received\n"); 237 } else { 238 /* 239 * prom_printf is the best shot we have of something which is 240 * problem free from high level/NMI type of interrupts 241 */ 242 prom_printf("NMI received\n"); 243 } 244 245 lock_clear(&xen_psm_nmi_lock); 246 } 247 248 static void 249 xen_psm_picinit() 250 { 251 int cpu, irqno; 252 cpuset_t cpus; 253 254 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 255 /* set a flag so we know we have run xen_psm_picinit() */ 256 apic_picinit_called = 1; 257 LOCK_INIT_CLEAR(&apic_ioapic_lock); 258 259 /* XXPV - do we need to do this? */ 260 picsetup(); /* initialise the 8259 */ 261 262 /* enable apic mode if imcr present */ 263 /* XXPV - do we need to do this either? */ 264 if (apic_imcrp) { 265 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 266 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 267 } 268 269 ioapic_init_intr(IOAPIC_NOMASK); 270 /* 271 * We never called xen_psm_addspl() when the SCI 272 * interrupt was added because that happened before the 273 * PSM module was loaded. Fix that up here by doing 274 * any missed operations (e.g. bind to CPU) 275 */ 276 if ((irqno = apic_sci_vect) > 0) { 277 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 278 CPUSET_ZERO(cpus); 279 CPUSET_OR(cpus, xen_psm_cpus_online); 280 } else { 281 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 282 } 283 ec_set_irq_affinity(irqno, cpus); 284 apic_irq_table[irqno]->airq_temp_cpu = 285 (uchar_t)(cpu & ~IRQ_USER_BOUND); 286 ec_enable_irq(irqno); 287 } 288 } 289 290 /* add nmi handler - least priority nmi handler */ 291 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 292 293 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 294 "xVM_psm NMI handler", (caddr_t)NULL)) 295 cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); 296 } 297 298 299 /* 300 * generates an interprocessor interrupt to another CPU 301 */ 302 static void 303 xen_psm_send_ipi(int cpun, int ipl) 304 { 305 ulong_t flag = intr_clear(); 306 307 ec_send_ipi(ipl, cpun); 308 intr_restore(flag); 309 } 310 311 /*ARGSUSED*/ 312 static int 313 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 314 { 315 int cpu, ret; 316 cpuset_t cpus; 317 318 /* 319 * We are called at splhi() so we can't call anything that might end 320 * up trying to context switch. 321 */ 322 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 323 DOMAIN_IS_INITDOMAIN(xen_info)) { 324 /* 325 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 326 */ 327 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 328 } else { 329 /* 330 * Set priority/affinity/enable for non PIRQs 331 */ 332 ret = ec_set_irq_priority(irqno, ipl); 333 ASSERT(ret == 0); 334 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 335 CPUSET_ZERO(cpus); 336 CPUSET_OR(cpus, xen_psm_cpus_online); 337 } else { 338 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 339 } 340 ec_set_irq_affinity(irqno, cpus); 341 ec_enable_irq(irqno); 342 } 343 return (ret); 344 } 345 346 /* 347 * Acquire ownership of this irq on this cpu 348 */ 349 void 350 xen_psm_acquire_irq(int irq) 351 { 352 ulong_t flags; 353 int cpuid; 354 355 /* 356 * If the irq is currently being serviced by another cpu 357 * we busy-wait for the other cpu to finish. Take any 358 * pending interrupts before retrying. 359 */ 360 do { 361 flags = intr_clear(); 362 cpuid = ec_block_irq(irq); 363 intr_restore(flags); 364 } while (cpuid != CPU->cpu_id); 365 } 366 367 /*ARGSUSED*/ 368 static int 369 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 370 { 371 apic_irq_t *irqptr; 372 int err = PSM_SUCCESS; 373 374 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 375 DOMAIN_IS_INITDOMAIN(xen_info)) { 376 irqptr = apic_irq_table[irqno]; 377 /* 378 * unbind if no more sharers of this irq/evtchn 379 */ 380 if (irqptr->airq_share == 1) { 381 xen_psm_acquire_irq(irqno); 382 ec_unbind_irq(irqno); 383 } 384 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 385 /* 386 * If still in use reset priority 387 */ 388 if (!err && irqptr->airq_share != 0) { 389 err = ec_set_irq_priority(irqno, max_ipl); 390 return (err); 391 } 392 } else { 393 xen_psm_acquire_irq(irqno); 394 ec_unbind_irq(irqno); 395 } 396 return (err); 397 } 398 399 static processorid_t 400 xen_psm_get_next_processorid(processorid_t id) 401 { 402 if (id == -1) 403 return (0); 404 405 for (id++; id < NCPU; id++) { 406 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 407 case 0: /* yeah, that one's there */ 408 return (id); 409 default: 410 case X_EINVAL: /* out of range */ 411 return (-1); 412 case X_ENOENT: /* not present in the domain */ 413 /* 414 * It's not clear that we -need- to keep looking 415 * at this point, if, e.g., we can guarantee 416 * the hypervisor always keeps a contiguous range 417 * of vcpus around this is equivalent to "out of range". 418 * 419 * But it would be sad to miss a vcpu we're 420 * supposed to be using .. 421 */ 422 break; 423 } 424 } 425 426 return (-1); 427 } 428 429 /* 430 * XXPV - undo the start cpu op change; return to ignoring this value 431 * - also tweak error handling in main startup loop 432 */ 433 /*ARGSUSED*/ 434 static int 435 xen_psm_cpu_start(processorid_t id, caddr_t arg) 436 { 437 int ret; 438 439 ASSERT(id > 0); 440 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); 441 ec_bind_cpu_ipis(id); 442 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 443 if ((ret = xen_vcpu_up(id)) == 0) 444 xen_psm_ncpus++; 445 else 446 ret = EINVAL; 447 return (ret); 448 } 449 450 /* 451 * Allocate an irq for inter cpu signaling 452 */ 453 /*ARGSUSED*/ 454 static int 455 xen_psm_get_ipivect(int ipl, int type) 456 { 457 return (ec_bind_ipi_to_irq(ipl, 0)); 458 } 459 460 /*ARGSUSED*/ 461 static int 462 xen_psm_get_clockirq(int ipl) 463 { 464 if (xen_clock_irq != INVALID_IRQ) 465 return (xen_clock_irq); 466 467 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 468 return (xen_clock_irq); 469 } 470 471 /*ARGSUSED*/ 472 static void 473 xen_psm_shutdown(int cmd, int fcn) 474 { 475 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 476 477 switch (cmd) { 478 case A_SHUTDOWN: 479 switch (fcn) { 480 case AD_BOOT: 481 case AD_IBOOT: 482 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 483 break; 484 case AD_POWEROFF: 485 /* fall through if domU or if poweroff fails */ 486 if (DOMAIN_IS_INITDOMAIN(xen_info)) 487 if (apic_enable_acpi) 488 (void) acpi_poweroff(); 489 /* FALLTHRU */ 490 case AD_HALT: 491 default: 492 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 493 break; 494 } 495 break; 496 case A_REBOOT: 497 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 498 break; 499 default: 500 return; 501 } 502 } 503 504 505 static int 506 xen_psm_translate_irq(dev_info_t *dip, int irqno) 507 { 508 if (dip == NULL) { 509 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 510 " dip = NULL\n", irqno)); 511 return (irqno); 512 } 513 return (irqno); 514 } 515 516 /* 517 * xen_psm_intr_enter() acks the event that triggered the interrupt and 518 * returns the new priority level, 519 */ 520 /*ARGSUSED*/ 521 static int 522 xen_psm_intr_enter(int ipl, int *vector) 523 { 524 int newipl; 525 uint_t intno; 526 cpu_t *cpu = CPU; 527 528 intno = (*vector); 529 530 ASSERT(intno < NR_IRQS); 531 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 532 533 ec_clear_irq(intno); 534 535 newipl = autovect[intno].avh_hi_pri; 536 if (newipl == 0) { 537 /* 538 * (newipl == 0) means we have no service routines for this 539 * vector. We will treat this as a spurious interrupt. 540 * We have cleared the pending bit already, clear the event 541 * mask and return a spurious interrupt. This case can happen 542 * when an interrupt delivery is racing with the removal of 543 * of the service routine for that interrupt. 544 */ 545 ec_unmask_irq(intno); 546 newipl = -1; /* flag spurious interrupt */ 547 } else if (newipl <= cpu->cpu_pri) { 548 /* 549 * (newipl <= cpu->cpu_pri) means that we must be trying to 550 * service a vector that was shared with a higher priority 551 * isr. The higher priority handler has been removed and 552 * we need to service this int. We can't return a lower 553 * priority than current cpu priority. Just synthesize a 554 * priority to return that should be acceptable. 555 */ 556 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 557 } 558 return (newipl); 559 } 560 561 562 /* 563 * xen_psm_intr_exit() restores the old interrupt 564 * priority level after processing an interrupt. 565 * It is called with interrupts disabled, and does not enable interrupts. 566 */ 567 /* ARGSUSED */ 568 static void 569 xen_psm_intr_exit(int ipl, int vector) 570 { 571 ec_try_unmask_irq(vector); 572 xen_psm_setspl(ipl); 573 } 574 575 intr_exit_fn_t 576 psm_intr_exit_fn(void) 577 { 578 return (xen_psm_intr_exit); 579 } 580 581 /* 582 * Check if new ipl level allows delivery of previously unserviced events 583 */ 584 static void 585 xen_psm_setspl(int ipl) 586 { 587 struct cpu *cpu = CPU; 588 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 589 uint16_t pending; 590 591 ASSERT(vci->evtchn_upcall_mask != 0); 592 593 /* 594 * If new ipl level will enable any pending interrupts, setup so the 595 * upcoming sti will cause us to get an upcall. 596 */ 597 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 598 if (pending) { 599 int i; 600 ulong_t pending_sels = 0; 601 volatile ulong_t *selp; 602 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 603 604 for (i = bsrw_insn(pending); i > ipl; i--) 605 pending_sels |= cpe->pending_sel[i]; 606 ASSERT(pending_sels); 607 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 608 atomic_or_ulong(selp, pending_sels); 609 vci->evtchn_upcall_pending = 1; 610 } 611 } 612 613 /* 614 * This function provides external interface to the nexus for all 615 * functionality related to the new DDI interrupt framework. 616 * 617 * Input: 618 * dip - pointer to the dev_info structure of the requested device 619 * hdlp - pointer to the internal interrupt handle structure for the 620 * requested interrupt 621 * intr_op - opcode for this call 622 * result - pointer to the integer that will hold the result to be 623 * passed back if return value is PSM_SUCCESS 624 * 625 * Output: 626 * return value is either PSM_SUCCESS or PSM_FAILURE 627 */ 628 int 629 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 630 psm_intr_op_t intr_op, int *result) 631 { 632 int cap; 633 int err; 634 int new_priority; 635 apic_irq_t *irqp; 636 struct intrspec *ispec; 637 638 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 639 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 640 641 switch (intr_op) { 642 case PSM_INTR_OP_CHECK_MSI: 643 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 644 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 645 DDI_INTR_TYPE_MSIX); 646 break; 647 } 648 /* 649 * Check MSI/X is supported or not at APIC level and 650 * masked off the MSI/X bits in hdlp->ih_type if not 651 * supported before return. If MSI/X is supported, 652 * leave the ih_type unchanged and return. 653 * 654 * hdlp->ih_type passed in from the nexus has all the 655 * interrupt types supported by the device. 656 */ 657 if (xen_support_msi == 0) { 658 /* 659 * if xen_support_msi is not set, call 660 * apic_check_msi_support() to check whether msi 661 * is supported first 662 */ 663 if (apic_check_msi_support() == PSM_SUCCESS) 664 xen_support_msi = 1; 665 else 666 xen_support_msi = -1; 667 } 668 if (xen_support_msi == 1) 669 *result = hdlp->ih_type; 670 else 671 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 672 DDI_INTR_TYPE_MSIX); 673 break; 674 case PSM_INTR_OP_ALLOC_VECTORS: 675 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 676 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 677 (int)(uintptr_t)hdlp->ih_scratch2); 678 break; 679 case PSM_INTR_OP_FREE_VECTORS: 680 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 681 hdlp->ih_pri, hdlp->ih_type); 682 break; 683 case PSM_INTR_OP_NAVAIL_VECTORS: 684 /* 685 * XXPV - maybe we should make this be: 686 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 687 */ 688 if (DOMAIN_IS_INITDOMAIN(xen_info)) 689 *result = APIC_VECTOR_PER_IPL; 690 else 691 *result = 1; 692 break; 693 case PSM_INTR_OP_XLATE_VECTOR: 694 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 695 if (ispec->intrspec_vec >= PIRQ_BASE && 696 ispec->intrspec_vec < NR_PIRQS && 697 DOMAIN_IS_INITDOMAIN(xen_info)) { 698 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 699 } else { 700 *result = ispec->intrspec_vec; 701 } 702 break; 703 case PSM_INTR_OP_GET_PENDING: 704 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 705 *result = ec_pending_irq(hdlp->ih_vector); 706 break; 707 case PSM_INTR_OP_CLEAR_MASK: 708 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 709 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 710 return (PSM_FAILURE); 711 ec_enable_irq(hdlp->ih_vector); 712 break; 713 case PSM_INTR_OP_SET_MASK: 714 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 715 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 716 return (PSM_FAILURE); 717 ec_disable_irq(hdlp->ih_vector); 718 break; 719 case PSM_INTR_OP_GET_CAP: 720 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 721 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 722 cap |= DDI_INTR_FLAG_MASKABLE; 723 *result = cap; 724 break; 725 case PSM_INTR_OP_GET_SHARED: 726 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 727 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 728 return (PSM_FAILURE); 729 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 730 == NULL) 731 return (PSM_FAILURE); 732 *result = irqp->airq_share ? 1: 0; 733 } else { 734 return (PSM_FAILURE); 735 } 736 break; 737 case PSM_INTR_OP_SET_PRI: 738 new_priority = *(int *)result; 739 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 740 if (err != 0) 741 return (PSM_FAILURE); 742 break; 743 case PSM_INTR_OP_GET_INTR: 744 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 745 return (PSM_FAILURE); 746 /* 747 * The interrupt handle given here has been allocated 748 * specifically for this command, and ih_private carries 749 * a pointer to a apic_get_intr_t. 750 */ 751 if (apic_get_vector_intr_info( 752 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 753 return (PSM_FAILURE); 754 break; 755 case PSM_INTR_OP_SET_CAP: 756 /* FALLTHRU */ 757 default: 758 return (PSM_FAILURE); 759 } 760 return (PSM_SUCCESS); 761 } 762 763 static void 764 xen_psm_rebind_irq(int irq) 765 { 766 cpuset_t ncpu; 767 processorid_t newcpu; 768 apic_irq_t *irqptr; 769 770 newcpu = xen_psm_bind_intr(irq); 771 if (newcpu == IRQ_UNBOUND) { 772 CPUSET_ZERO(ncpu); 773 CPUSET_OR(ncpu, xen_psm_cpus_online); 774 } else { 775 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 776 } 777 ec_set_irq_affinity(irq, ncpu); 778 irqptr = apic_irq_table[irq]; 779 ASSERT(irqptr != NULL); 780 irqptr->airq_temp_cpu = (uchar_t)newcpu; 781 } 782 783 /* 784 * Disable all device interrupts for the given cpu. 785 * High priority interrupts are not disabled and will still be serviced. 786 */ 787 static int 788 xen_psm_disable_intr(processorid_t cpun) 789 { 790 int irq; 791 792 /* 793 * Can't offline VCPU 0 on this hypervisor. There's no reason 794 * anyone would want to given that the CPUs are virtual. Also note 795 * that the hypervisor requires suspend/resume to be on VCPU 0. 796 */ 797 if (cpun == 0) 798 return (PSM_FAILURE); 799 800 CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); 801 for (irq = 0; irq < NR_IRQS; irq++) { 802 if (!ec_irq_needs_rebind(irq, cpun)) 803 continue; 804 xen_psm_rebind_irq(irq); 805 } 806 return (PSM_SUCCESS); 807 } 808 809 static void 810 xen_psm_enable_intr(processorid_t cpun) 811 { 812 int irq; 813 814 if (cpun == 0) 815 return; 816 817 CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); 818 819 /* 820 * Rebalance device interrupts among online processors 821 */ 822 for (irq = 0; irq < NR_IRQS; irq++) { 823 if (!ec_irq_rebindable(irq)) 824 continue; 825 xen_psm_rebind_irq(irq); 826 } 827 } 828 829 static int 830 xen_psm_post_cpu_start() 831 { 832 processorid_t cpun; 833 834 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 835 cpun = psm_get_cpu_id(); 836 apic_cpus[cpun].aci_status = 837 APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 838 } 839 /* 840 * Re-distribute interrupts to include the newly added cpu. 841 */ 842 xen_psm_enable_intr(cpun); 843 return (PSM_SUCCESS); 844 } 845 846 /* 847 * This function will reprogram the timer. 848 * 849 * When in oneshot mode the argument is the absolute time in future at which to 850 * generate the interrupt. 851 * 852 * When in periodic mode, the argument is the interval at which the 853 * interrupts should be generated. There is no need to support the periodic 854 * mode timer change at this time. 855 * 856 * Note that we must be careful to convert from hrtime to Xen system time (see 857 * xpv_timestamp.c). 858 */ 859 static void 860 xen_psm_timer_reprogram(hrtime_t timer_req) 861 { 862 hrtime_t now, timer_new, time_delta, xen_time; 863 ulong_t flags; 864 865 flags = intr_clear(); 866 /* 867 * We should be called from high PIL context (CBE_HIGH_PIL), 868 * so kpreempt is disabled. 869 */ 870 871 now = xpv_gethrtime(); 872 xen_time = xpv_getsystime(); 873 if (timer_req <= now) { 874 /* 875 * requested to generate an interrupt in the past 876 * generate an interrupt as soon as possible 877 */ 878 time_delta = XEN_NSEC_PER_TICK; 879 } else 880 time_delta = timer_req - now; 881 882 timer_new = xen_time + time_delta; 883 if (HYPERVISOR_set_timer_op(timer_new) != 0) 884 panic("can't set hypervisor timer?"); 885 intr_restore(flags); 886 } 887 888 /* 889 * This function will enable timer interrupts. 890 */ 891 static void 892 xen_psm_timer_enable(void) 893 { 894 ec_unmask_irq(xen_clock_irq); 895 } 896 897 /* 898 * This function will disable timer interrupts on the current cpu. 899 */ 900 static void 901 xen_psm_timer_disable(void) 902 { 903 (void) ec_block_irq(xen_clock_irq); 904 /* 905 * If the clock irq is pending on this cpu then we need to 906 * clear the pending interrupt. 907 */ 908 ec_unpend_irq(xen_clock_irq); 909 } 910 911 /* 912 * 913 * The following functions are in the platform specific file so that they 914 * can be different functions depending on whether we are running on 915 * bare metal or a hypervisor. 916 */ 917 918 /* 919 * Allocate a free vector for irq at ipl. 920 */ 921 /* ARGSUSED */ 922 uchar_t 923 apic_allocate_vector(int ipl, int irq, int pri) 924 { 925 physdev_irq_t irq_op; 926 uchar_t vector; 927 928 irq_op.irq = irq; 929 930 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 931 panic("Hypervisor alloc vector failed"); 932 vector = irq_op.vector; 933 /* 934 * No need to worry about vector colliding with our reserved vectors 935 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 936 * generated traps and handle them properly. 937 */ 938 apic_vector_to_irq[vector] = (uchar_t)irq; 939 return (vector); 940 } 941 942 /* Mark vector as not being used by any irq */ 943 void 944 apic_free_vector(uchar_t vector) 945 { 946 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 947 } 948 949 /* 950 * This function allocate "count" vector(s) for the given "dip/pri/type" 951 */ 952 static int 953 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 954 int behavior) 955 { 956 int rcount, i; 957 uchar_t vector, cpu; 958 int irqno; 959 major_t major; 960 apic_irq_t *irqptr; 961 962 /* only supports MSI at the moment, will add MSI-X support later */ 963 if (type != DDI_INTR_TYPE_MSI) 964 return (0); 965 966 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 967 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 968 (void *)dip, type, inum, pri, count, behavior)); 969 970 if (count > 1) { 971 if (behavior == DDI_INTR_ALLOC_STRICT && 972 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 973 return (0); 974 975 if (apic_multi_msi_enable == 0) 976 count = 1; 977 else if (count > apic_multi_msi_max) 978 count = apic_multi_msi_max; 979 } 980 981 /* 982 * XXPV - metal version takes all vectors avail at given pri. 983 * Why do that? For now just allocate count vectors. 984 */ 985 rcount = count; 986 987 mutex_enter(&airq_mutex); 988 989 /* 990 * XXPV - currently the hypervisor does not support MSI at all. 991 * It doesn't return consecutive vectors. This code is a first 992 * cut for the (future) time that MSI is supported. 993 */ 994 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 995 for (i = 0; i < rcount; i++) { 996 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 997 INVALID_IRQ) { 998 mutex_exit(&airq_mutex); 999 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1000 "apic_allocate_irq failed\n")); 1001 return (i); 1002 } 1003 apic_max_device_irq = max(irqno, apic_max_device_irq); 1004 apic_min_device_irq = min(irqno, apic_min_device_irq); 1005 irqptr = apic_irq_table[irqno]; 1006 vector = apic_allocate_vector(pri, irqno, 0); 1007 apic_vector_to_irq[vector] = (uchar_t)irqno; 1008 #ifdef DEBUG 1009 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1010 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1011 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1012 #endif 1013 1014 irqptr->airq_vector = vector; 1015 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1016 irqptr->airq_intin_no = (uchar_t)rcount; 1017 irqptr->airq_ipl = pri; 1018 irqptr->airq_origirq = (uchar_t)(inum + i); 1019 irqptr->airq_share_id = 0; 1020 irqptr->airq_mps_intr_index = MSI_INDEX; 1021 irqptr->airq_dip = dip; 1022 irqptr->airq_major = major; 1023 if (i == 0) /* they all bound to the same cpu */ 1024 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1025 0xff, 0xff); 1026 else 1027 irqptr->airq_cpu = cpu; 1028 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1029 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1030 (void *)irqptr->airq_dip, irqptr->airq_vector, 1031 irqptr->airq_origirq, pri)); 1032 } 1033 mutex_exit(&airq_mutex); 1034 return (rcount); 1035 } 1036 1037 /* 1038 * The hypervisor doesn't permit access to local apics directly 1039 */ 1040 /* ARGSUSED */ 1041 uint32_t * 1042 mapin_apic(uint32_t addr, size_t len, int flags) 1043 { 1044 /* 1045 * Return a pointer to a memory area to fake out the 1046 * probe code that wants to read apic registers. 1047 * The dummy values will end up being ignored by xen 1048 * later on when they are used anyway. 1049 */ 1050 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1051 return (xen_psm_dummy_apic); 1052 } 1053 1054 /* ARGSUSED */ 1055 uint32_t * 1056 mapin_ioapic(uint32_t addr, size_t len, int flags) 1057 { 1058 /* 1059 * Return non-null here to fake out configure code that calls this. 1060 * The i86xpv platform will not reference through the returned value.. 1061 */ 1062 return ((uint32_t *)0x1); 1063 } 1064 1065 /* ARGSUSED */ 1066 void 1067 mapout_apic(caddr_t addr, size_t len) 1068 { 1069 } 1070 1071 /* ARGSUSED */ 1072 void 1073 mapout_ioapic(caddr_t addr, size_t len) 1074 { 1075 } 1076 1077 uint32_t 1078 ioapic_read(int apic_ix, uint32_t reg) 1079 { 1080 physdev_apic_t apic; 1081 1082 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1083 apic.reg = reg; 1084 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1085 panic("read ioapic %d reg %d failed", apic_ix, reg); 1086 return (apic.value); 1087 } 1088 1089 void 1090 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1091 { 1092 physdev_apic_t apic; 1093 1094 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1095 apic.reg = reg; 1096 apic.value = value; 1097 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1098 panic("write ioapic %d reg %d failed", apic_ix, reg); 1099 } 1100 1101 /* 1102 * Call rebind to do the actual programming. 1103 */ 1104 int 1105 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1106 { 1107 apic_irq_t *irqptr; 1108 struct ioapic_reprogram_data *drep = NULL; 1109 int rv, cpu; 1110 cpuset_t cpus; 1111 1112 /* 1113 * Set cpu based on xen idea of online cpu's not apic tables. 1114 * Note that xen ignores/sets to it's own preferred value the 1115 * target cpu field when programming ioapic anyway. 1116 */ 1117 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1118 CPUSET_ZERO(cpus); 1119 CPUSET_OR(cpus, xen_psm_cpus_online); 1120 } else { 1121 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1122 } 1123 apic_irq_table[irq]->airq_cpu = cpu; 1124 if (deferred) { 1125 drep = (struct ioapic_reprogram_data *)p; 1126 ASSERT(drep != NULL); 1127 irqptr = drep->irqp; 1128 } else { 1129 irqptr = (apic_irq_t *)p; 1130 } 1131 ASSERT(irqptr != NULL); 1132 rv = apic_rebind(irqptr, cpu, drep); 1133 if (rv) { 1134 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1135 cpu = 0; 1136 rv = apic_rebind(irqptr, cpu, drep); 1137 } 1138 /* 1139 * If rebind successful bind the irq to an event channel 1140 */ 1141 if (rv == 0) { 1142 ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); 1143 CPUSET_FIND(cpus, cpu); 1144 apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; 1145 } 1146 return (rv); 1147 } 1148 1149 /* 1150 * Allocate a new vector for the given irq 1151 */ 1152 /* ARGSUSED */ 1153 uchar_t 1154 apic_modify_vector(uchar_t vector, int irq) 1155 { 1156 return (apic_allocate_vector(0, irq, 0)); 1157 } 1158 1159 /* 1160 * The rest of the file is just generic psm module boilerplate 1161 */ 1162 1163 static struct psm_ops xen_psm_ops = { 1164 xen_psm_probe, /* psm_probe */ 1165 1166 xen_psm_softinit, /* psm_init */ 1167 xen_psm_picinit, /* psm_picinit */ 1168 xen_psm_intr_enter, /* psm_intr_enter */ 1169 xen_psm_intr_exit, /* psm_intr_exit */ 1170 xen_psm_setspl, /* psm_setspl */ 1171 xen_psm_addspl, /* psm_addspl */ 1172 xen_psm_delspl, /* psm_delspl */ 1173 xen_psm_disable_intr, /* psm_disable_intr */ 1174 xen_psm_enable_intr, /* psm_enable_intr */ 1175 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1176 (void (*)(int))NULL, /* psm_set_softintr */ 1177 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1178 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1179 1180 xen_psm_clkinit, /* psm_clkinit */ 1181 xen_psm_get_clockirq, /* psm_get_clockirq */ 1182 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1183 xpv_gethrtime, /* psm_gethrtime */ 1184 1185 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1186 xen_psm_cpu_start, /* psm_cpu_start */ 1187 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1188 xen_psm_shutdown, /* psm_shutdown */ 1189 xen_psm_get_ipivect, /* psm_get_ipivect */ 1190 xen_psm_send_ipi, /* psm_send_ipi */ 1191 1192 xen_psm_translate_irq, /* psm_translate_irq */ 1193 1194 (void (*)(int, char *))NULL, /* psm_notify_error */ 1195 (void (*)(int msg))NULL, /* psm_notify_func */ 1196 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1197 xen_psm_timer_enable, /* psm_timer_enable */ 1198 xen_psm_timer_disable, /* psm_timer_disable */ 1199 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1200 (void (*)(int, int))NULL, /* psm_preshutdown */ 1201 xen_intr_ops /* Advanced DDI Interrupt framework */ 1202 }; 1203 1204 static struct psm_info xen_psm_info = { 1205 PSM_INFO_VER01_5, /* version */ 1206 PSM_OWN_SYS_DEFAULT, /* ownership */ 1207 &xen_psm_ops, /* operation */ 1208 "xVM_psm", /* machine name */ 1209 "platform module %I%" /* machine descriptions */ 1210 }; 1211 1212 static void *xen_psm_hdlp; 1213 1214 int 1215 _init(void) 1216 { 1217 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1218 } 1219 1220 int 1221 _fini(void) 1222 { 1223 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1224 } 1225 1226 int 1227 _info(struct modinfo *modinfop) 1228 { 1229 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1230 } 1231