1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define PSMI_1_5 30 31 #include <sys/mutex.h> 32 #include <sys/types.h> 33 #include <sys/time.h> 34 #include <sys/clock.h> 35 #include <sys/machlock.h> 36 #include <sys/smp_impldefs.h> 37 #include <sys/uadmin.h> 38 #include <sys/promif.h> 39 #include <sys/psm.h> 40 #include <sys/psm_common.h> 41 #include <sys/atomic.h> 42 #include <sys/apic.h> 43 #include <sys/archsystm.h> 44 #include <sys/mach_intr.h> 45 #include <sys/hypervisor.h> 46 #include <sys/evtchn_impl.h> 47 #include <sys/modctl.h> 48 #include <sys/trap.h> 49 #include <sys/panic.h> 50 51 #include <xen/public/vcpu.h> 52 #include <xen/public/physdev.h> 53 54 55 /* 56 * Global Data 57 */ 58 59 int xen_psm_verbose = 0; 60 61 volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ 62 int apic_error = 0; 63 int apic_verbose = 0; 64 cpuset_t apic_cpumask; 65 int apic_forceload = 0; 66 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 67 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 68 }; 69 uchar_t apic_ipltopri[MAXIPL + 1]; 70 uchar_t apic_ipls[APIC_AVAIL_VECTOR]; 71 uint_t apic_picinit_called; 72 apic_cpus_info_t *apic_cpus; 73 int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 74 /* use to make sure only one cpu handles the nmi */ 75 static lock_t xen_psm_nmi_lock; 76 int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 77 int xen_psm_panic_on_nmi = 0; 78 int xen_psm_num_nmis = 0; 79 80 cpuset_t xen_psm_cpus_online; /* online cpus */ 81 int xen_psm_ncpus = 1; /* cpu count */ 82 int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ 83 84 /* 85 * XXPV we flag MSI as not supported, since the hypervisor currently doesn't 86 * support MSI at all. Change this initialization to zero when MSI is 87 * supported. 88 */ 89 int xen_support_msi = -1; 90 91 static int xen_clock_irq = INVALID_IRQ; 92 93 /* flag definitions for xen_psm_verbose */ 94 #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 95 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 96 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 97 98 #define XEN_PSM_VERBOSE_IRQ(fmt) \ 99 if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ 100 cmn_err fmt; 101 102 #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ 103 if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ 104 prom_printf fmt; 105 106 /* 107 * Dummy apic array to point common routines at that want to do some apic 108 * manipulation. Xen doesn't allow guest apic access so we point at these 109 * memory locations to fake out those who want to do apic fiddling. 110 */ 111 uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; 112 113 static struct psm_info xen_psm_info; 114 static void xen_psm_setspl(int); 115 116 static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); 117 118 /* 119 * Local support routines 120 */ 121 122 /* 123 * Select vcpu to bind xen virtual device interrupt to. 124 */ 125 /*ARGSUSED*/ 126 int 127 xen_psm_bind_intr(int irq) 128 { 129 int bind_cpu, test_cpu; 130 apic_irq_t *irqptr; 131 132 if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) 133 return (IRQ_UNBOUND); 134 if (irq <= APIC_MAX_VECTOR) 135 irqptr = apic_irq_table[irq]; 136 else 137 irqptr = NULL; 138 if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { 139 bind_cpu = irqptr->airq_cpu; 140 test_cpu = bind_cpu & ~IRQ_USER_BOUND; 141 if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) 142 bind_cpu = 0; 143 goto done; 144 } 145 if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 146 do { 147 bind_cpu = xen_psm_next_bind_cpu++; 148 if (xen_psm_next_bind_cpu >= xen_psm_ncpus) 149 xen_psm_next_bind_cpu = 0; 150 } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); 151 } else { 152 bind_cpu = 0; 153 } 154 done: 155 return (bind_cpu); 156 } 157 158 /* 159 * Autoconfiguration Routines 160 */ 161 162 static int 163 xen_psm_probe(void) 164 { 165 int ret = PSM_SUCCESS; 166 167 if (DOMAIN_IS_INITDOMAIN(xen_info)) 168 ret = apic_probe_common(xen_psm_info.p_mach_idstring); 169 return (ret); 170 } 171 172 static void 173 xen_psm_softinit(void) 174 { 175 /* LINTED logical expression always true: op "||" */ 176 ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); 177 CPUSET_ADD(xen_psm_cpus_online, 0); 178 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 179 apic_init_common(); 180 } 181 } 182 183 #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ 184 185 /*ARGSUSED*/ 186 static int 187 xen_psm_clkinit(int hertz) 188 { 189 extern enum tod_fault_type tod_fault(enum tod_fault_type, int); 190 extern int dosynctodr; 191 192 /* 193 * domU cannot set the TOD hardware, fault the TOD clock now to 194 * indicate that and turn off attempts to sync TOD hardware 195 * with the hires timer. 196 */ 197 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 198 mutex_enter(&tod_lock); 199 (void) tod_fault(TOD_RDONLY, 0); 200 dosynctodr = 0; 201 mutex_exit(&tod_lock); 202 } 203 /* 204 * The hypervisor provides a timer based on the local APIC timer. 205 * The interface supports requests of nanosecond resolution. 206 * A common frequency of the apic clock is 100 Mhz which 207 * gives a resolution of 10 nsec per tick. What we would really like 208 * is a way to get the ns per tick value from xen. 209 * XXPV - This is an assumption that needs checking and may change 210 */ 211 return (XEN_NSEC_PER_TICK); 212 } 213 214 static void 215 xen_psm_hrtimeinit(void) 216 { 217 extern int gethrtime_hires; 218 gethrtime_hires = 1; 219 } 220 221 /* xen_psm NMI handler */ 222 /*ARGSUSED*/ 223 static void 224 xen_psm_nmi_intr(caddr_t arg, struct regs *rp) 225 { 226 xen_psm_num_nmis++; 227 228 if (!lock_try(&xen_psm_nmi_lock)) 229 return; 230 231 if (xen_psm_kmdb_on_nmi && psm_debugger()) { 232 debug_enter("NMI received: entering kmdb\n"); 233 } else if (xen_psm_panic_on_nmi) { 234 /* Keep panic from entering kmdb. */ 235 nopanicdebug = 1; 236 panic("NMI received\n"); 237 } else { 238 /* 239 * prom_printf is the best shot we have of something which is 240 * problem free from high level/NMI type of interrupts 241 */ 242 prom_printf("NMI received\n"); 243 } 244 245 lock_clear(&xen_psm_nmi_lock); 246 } 247 248 static void 249 xen_psm_picinit() 250 { 251 int cpu, irqno; 252 cpuset_t cpus; 253 254 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 255 /* set a flag so we know we have run xen_psm_picinit() */ 256 apic_picinit_called = 1; 257 LOCK_INIT_CLEAR(&apic_ioapic_lock); 258 259 /* XXPV - do we need to do this? */ 260 picsetup(); /* initialise the 8259 */ 261 262 /* enable apic mode if imcr present */ 263 /* XXPV - do we need to do this either? */ 264 if (apic_imcrp) { 265 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 266 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 267 } 268 269 ioapic_init_intr(IOAPIC_NOMASK); 270 /* 271 * We never called xen_psm_addspl() when the SCI 272 * interrupt was added because that happened before the 273 * PSM module was loaded. Fix that up here by doing 274 * any missed operations (e.g. bind to CPU) 275 */ 276 if ((irqno = apic_sci_vect) > 0) { 277 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 278 CPUSET_ZERO(cpus); 279 CPUSET_OR(cpus, xen_psm_cpus_online); 280 } else { 281 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 282 } 283 ec_set_irq_affinity(irqno, cpus); 284 ec_enable_irq(irqno); 285 } 286 } 287 288 /* add nmi handler - least priority nmi handler */ 289 LOCK_INIT_CLEAR(&xen_psm_nmi_lock); 290 291 if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, 292 "xen_psm NMI handler", (caddr_t)NULL)) 293 cmn_err(CE_WARN, "xen_psm: Unable to add nmi handler"); 294 } 295 296 297 /* 298 * generates an interprocessor interrupt to another CPU 299 */ 300 static void 301 xen_psm_send_ipi(int cpun, int ipl) 302 { 303 ulong_t flag = intr_clear(); 304 305 ec_send_ipi(ipl, cpun); 306 intr_restore(flag); 307 } 308 309 /*ARGSUSED*/ 310 static int 311 xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 312 { 313 int cpu, ret; 314 cpuset_t cpus; 315 316 /* 317 * We are called at splhi() so we can't call anything that might end 318 * up trying to context switch. 319 */ 320 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 321 DOMAIN_IS_INITDOMAIN(xen_info)) { 322 /* 323 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() 324 */ 325 ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); 326 } else { 327 /* 328 * Set priority/affinity/enable for non PIRQs 329 */ 330 ret = ec_set_irq_priority(irqno, ipl); 331 ASSERT(ret == 0); 332 if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { 333 CPUSET_ZERO(cpus); 334 CPUSET_OR(cpus, xen_psm_cpus_online); 335 } else { 336 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 337 } 338 ec_set_irq_affinity(irqno, cpus); 339 ec_enable_irq(irqno); 340 } 341 return (ret); 342 } 343 344 /* 345 * Acquire ownership of this irq on this cpu 346 */ 347 void 348 xen_psm_acquire_irq(int irq) 349 { 350 ulong_t flags; 351 int cpuid; 352 353 /* 354 * If the irq is currently being serviced by another cpu 355 * we busy-wait for the other cpu to finish. Take any 356 * pending interrupts before retrying. 357 */ 358 do { 359 flags = intr_clear(); 360 cpuid = ec_block_irq(irq); 361 intr_restore(flags); 362 } while (cpuid != CPU->cpu_id); 363 } 364 365 /*ARGSUSED*/ 366 static int 367 xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 368 { 369 apic_irq_t *irqptr; 370 int err = PSM_SUCCESS; 371 372 if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && 373 DOMAIN_IS_INITDOMAIN(xen_info)) { 374 irqptr = apic_irq_table[irqno]; 375 /* 376 * unbind if no more sharers of this irq/evtchn 377 */ 378 if (irqptr->airq_share == 1) { 379 xen_psm_acquire_irq(irqno); 380 ec_unbind_irq(irqno); 381 } 382 err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); 383 /* 384 * If still in use reset priority 385 */ 386 if (!err && irqptr->airq_share != 0) { 387 err = ec_set_irq_priority(irqno, max_ipl); 388 return (err); 389 } 390 } else { 391 xen_psm_acquire_irq(irqno); 392 ec_unbind_irq(irqno); 393 } 394 return (err); 395 } 396 397 static int 398 xen_psm_post_cpu_start() 399 { 400 int cpun; 401 402 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 403 cpun = psm_get_cpu_id(); 404 apic_cpus[cpun].aci_status = 405 APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 406 } 407 return (PSM_SUCCESS); 408 } 409 410 static processorid_t 411 xen_psm_get_next_processorid(processorid_t id) 412 { 413 if (id == -1) 414 return (0); 415 416 for (id++; id < NCPU; id++) { 417 switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { 418 case 0: /* yeah, that one's there */ 419 return (id); 420 default: 421 case X_EINVAL: /* out of range */ 422 return (-1); 423 case X_ENOENT: /* not present in the domain */ 424 /* 425 * It's not clear that we -need- to keep looking 426 * at this point, if, e.g., we can guarantee 427 * the hypervisor always keeps a contiguous range 428 * of vcpus around this is equivalent to "out of range". 429 * 430 * But it would be sad to miss a vcpu we're 431 * supposed to be using .. 432 */ 433 break; 434 } 435 } 436 437 return (-1); 438 } 439 440 /* 441 * XXPV - undo the start cpu op change; return to ignoring this value 442 * - also tweak error handling in main startup loop 443 */ 444 /*ARGSUSED*/ 445 static int 446 xen_psm_cpu_start(processorid_t id, caddr_t arg) 447 { 448 int ret; 449 450 ASSERT(id > 0); 451 CPUSET_ADD(xen_psm_cpus_online, id); 452 ec_bind_cpu_ipis(id); 453 (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); 454 if ((ret = xen_vcpu_up(id)) == 0) 455 xen_psm_ncpus++; 456 else 457 ret = EINVAL; 458 return (ret); 459 } 460 461 /* 462 * Allocate an irq for inter cpu signaling 463 */ 464 /*ARGSUSED*/ 465 static int 466 xen_psm_get_ipivect(int ipl, int type) 467 { 468 return (ec_bind_ipi_to_irq(ipl, 0)); 469 } 470 471 /*ARGSUSED*/ 472 static int 473 xen_psm_get_clockirq(int ipl) 474 { 475 if (xen_clock_irq != INVALID_IRQ) 476 return (xen_clock_irq); 477 478 xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); 479 return (xen_clock_irq); 480 } 481 482 /*ARGSUSED*/ 483 static void 484 xen_psm_shutdown(int cmd, int fcn) 485 { 486 XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); 487 488 switch (cmd) { 489 case A_SHUTDOWN: 490 switch (fcn) { 491 case AD_BOOT: 492 case AD_IBOOT: 493 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 494 break; 495 case AD_POWEROFF: 496 /* fall through if domU or if poweroff fails */ 497 if (DOMAIN_IS_INITDOMAIN(xen_info)) 498 if (apic_enable_acpi) 499 (void) acpi_poweroff(); 500 /* FALLTHRU */ 501 case AD_HALT: 502 default: 503 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 504 break; 505 } 506 break; 507 case A_REBOOT: 508 (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); 509 break; 510 default: 511 return; 512 } 513 } 514 515 516 static int 517 xen_psm_translate_irq(dev_info_t *dip, int irqno) 518 { 519 if (dip == NULL) { 520 XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" 521 " dip = NULL\n", irqno)); 522 return (irqno); 523 } 524 return (irqno); 525 } 526 527 /* 528 * xen_psm_intr_enter() acks the event that triggered the interrupt and 529 * returns the new priority level, 530 */ 531 /*ARGSUSED*/ 532 static int 533 xen_psm_intr_enter(int ipl, int *vector) 534 { 535 int newipl; 536 uint_t intno; 537 cpu_t *cpu = CPU; 538 539 intno = (*vector); 540 541 ASSERT(intno < NR_IRQS); 542 ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 543 544 ec_clear_irq(intno); 545 546 newipl = autovect[intno].avh_hi_pri; 547 if (newipl == 0) { 548 /* 549 * (newipl == 0) means we have no service routines for this 550 * vector. We will treat this as a spurious interrupt. 551 * We have cleared the pending bit already, clear the event 552 * mask and return a spurious interrupt. This case can happen 553 * when an interrupt delivery is racing with the removal of 554 * of the service routine for that interrupt. 555 */ 556 ec_unmask_irq(intno); 557 newipl = -1; /* flag spurious interrupt */ 558 } else if (newipl <= cpu->cpu_pri) { 559 /* 560 * (newipl <= cpu->cpu_pri) means that we must be trying to 561 * service a vector that was shared with a higher priority 562 * isr. The higher priority handler has been removed and 563 * we need to service this int. We can't return a lower 564 * priority than current cpu priority. Just synthesize a 565 * priority to return that should be acceptable. 566 */ 567 newipl = cpu->cpu_pri + 1; /* synthetic priority */ 568 } 569 return (newipl); 570 } 571 572 573 /* 574 * xen_psm_intr_exit() restores the old interrupt 575 * priority level after processing an interrupt. 576 * It is called with interrupts disabled, and does not enable interrupts. 577 */ 578 /* ARGSUSED */ 579 static void 580 xen_psm_intr_exit(int ipl, int vector) 581 { 582 ec_try_unmask_irq(vector); 583 xen_psm_setspl(ipl); 584 } 585 586 intr_exit_fn_t 587 psm_intr_exit_fn(void) 588 { 589 return (xen_psm_intr_exit); 590 } 591 592 /* 593 * Check if new ipl level allows delivery of previously unserviced events 594 */ 595 static void 596 xen_psm_setspl(int ipl) 597 { 598 struct cpu *cpu = CPU; 599 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 600 uint16_t pending; 601 602 ASSERT(vci->evtchn_upcall_mask != 0); 603 604 /* 605 * If new ipl level will enable any pending interrupts, setup so the 606 * upcoming sti will cause us to get an upcall. 607 */ 608 pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); 609 if (pending) { 610 int i; 611 ulong_t pending_sels = 0; 612 volatile ulong_t *selp; 613 struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 614 615 for (i = bsrw_insn(pending); i > ipl; i--) 616 pending_sels |= cpe->pending_sel[i]; 617 ASSERT(pending_sels); 618 selp = (volatile ulong_t *)&vci->evtchn_pending_sel; 619 atomic_or_ulong(selp, pending_sels); 620 vci->evtchn_upcall_pending = 1; 621 } 622 } 623 624 /* 625 * This function provides external interface to the nexus for all 626 * functionality related to the new DDI interrupt framework. 627 * 628 * Input: 629 * dip - pointer to the dev_info structure of the requested device 630 * hdlp - pointer to the internal interrupt handle structure for the 631 * requested interrupt 632 * intr_op - opcode for this call 633 * result - pointer to the integer that will hold the result to be 634 * passed back if return value is PSM_SUCCESS 635 * 636 * Output: 637 * return value is either PSM_SUCCESS or PSM_FAILURE 638 */ 639 int 640 xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, 641 psm_intr_op_t intr_op, int *result) 642 { 643 int cap; 644 int err; 645 int new_priority; 646 apic_irq_t *irqp; 647 struct intrspec *ispec; 648 649 DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " 650 "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); 651 652 switch (intr_op) { 653 case PSM_INTR_OP_CHECK_MSI: 654 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 655 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 656 DDI_INTR_TYPE_MSIX); 657 break; 658 } 659 /* 660 * Check MSI/X is supported or not at APIC level and 661 * masked off the MSI/X bits in hdlp->ih_type if not 662 * supported before return. If MSI/X is supported, 663 * leave the ih_type unchanged and return. 664 * 665 * hdlp->ih_type passed in from the nexus has all the 666 * interrupt types supported by the device. 667 */ 668 if (xen_support_msi == 0) { 669 /* 670 * if xen_support_msi is not set, call 671 * apic_check_msi_support() to check whether msi 672 * is supported first 673 */ 674 if (apic_check_msi_support() == PSM_SUCCESS) 675 xen_support_msi = 1; 676 else 677 xen_support_msi = -1; 678 } 679 if (xen_support_msi == 1) 680 *result = hdlp->ih_type; 681 else 682 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | 683 DDI_INTR_TYPE_MSIX); 684 break; 685 case PSM_INTR_OP_ALLOC_VECTORS: 686 *result = apic_alloc_vectors(dip, hdlp->ih_inum, 687 hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, 688 (int)(uintptr_t)hdlp->ih_scratch2); 689 break; 690 case PSM_INTR_OP_FREE_VECTORS: 691 apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, 692 hdlp->ih_pri, hdlp->ih_type); 693 break; 694 case PSM_INTR_OP_NAVAIL_VECTORS: 695 /* 696 * XXPV - maybe we should make this be: 697 * min(APIC_VECTOR_PER_IPL, count of all avail vectors); 698 */ 699 if (DOMAIN_IS_INITDOMAIN(xen_info)) 700 *result = APIC_VECTOR_PER_IPL; 701 else 702 *result = 1; 703 break; 704 case PSM_INTR_OP_XLATE_VECTOR: 705 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; 706 if (ispec->intrspec_vec >= PIRQ_BASE && 707 ispec->intrspec_vec < NR_PIRQS && 708 DOMAIN_IS_INITDOMAIN(xen_info)) { 709 *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); 710 } else { 711 *result = ispec->intrspec_vec; 712 } 713 break; 714 case PSM_INTR_OP_GET_PENDING: 715 /* XXPV - is this enough for dom0 or do we need to ref ioapic */ 716 *result = ec_pending_irq(hdlp->ih_vector); 717 break; 718 case PSM_INTR_OP_CLEAR_MASK: 719 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 720 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 721 return (PSM_FAILURE); 722 ec_enable_irq(hdlp->ih_vector); 723 break; 724 case PSM_INTR_OP_SET_MASK: 725 /* XXPV - is this enough for dom0 or do we need to set ioapic */ 726 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 727 return (PSM_FAILURE); 728 ec_disable_irq(hdlp->ih_vector); 729 break; 730 case PSM_INTR_OP_GET_CAP: 731 cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; 732 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) 733 cap |= DDI_INTR_FLAG_MASKABLE; 734 *result = cap; 735 break; 736 case PSM_INTR_OP_GET_SHARED: 737 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 738 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) 739 return (PSM_FAILURE); 740 if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) 741 == NULL) 742 return (PSM_FAILURE); 743 *result = irqp->airq_share ? 1: 0; 744 } else { 745 return (PSM_FAILURE); 746 } 747 break; 748 case PSM_INTR_OP_SET_PRI: 749 new_priority = *(int *)result; 750 err = ec_set_irq_priority(hdlp->ih_vector, new_priority); 751 if (err != 0) 752 return (PSM_FAILURE); 753 break; 754 case PSM_INTR_OP_GET_INTR: 755 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 756 return (PSM_FAILURE); 757 /* 758 * The interrupt handle given here has been allocated 759 * specifically for this command, and ih_private carries 760 * a pointer to a apic_get_intr_t. 761 */ 762 if (apic_get_vector_intr_info( 763 hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) 764 return (PSM_FAILURE); 765 break; 766 case PSM_INTR_OP_SET_CAP: 767 /* FALLTHRU */ 768 default: 769 return (PSM_FAILURE); 770 } 771 return (PSM_SUCCESS); 772 } 773 774 static void 775 xen_psm_rebind_irq(int irq) 776 { 777 cpuset_t ncpu; 778 processorid_t newcpu; 779 780 newcpu = xen_psm_bind_intr(irq); 781 if (newcpu == IRQ_UNBOUND) { 782 CPUSET_ZERO(ncpu); 783 CPUSET_OR(ncpu, xen_psm_cpus_online); 784 } else { 785 CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); 786 } 787 ec_set_irq_affinity(irq, ncpu); 788 } 789 790 /* 791 * Disable all device interrupts for the given cpu. 792 * High priority interrupts are not disabled and will still be serviced. 793 */ 794 static int 795 xen_psm_disable_intr(processorid_t cpun) 796 { 797 int irq; 798 799 /* 800 * Can't offline VCPU 0 on this hypervisor. There's no reason 801 * anyone would want to given that the CPUs are virtual. Also note 802 * that the hypervisor requires suspend/resume to be on VCPU 0. 803 */ 804 if (cpun == 0) 805 return (PSM_FAILURE); 806 807 CPUSET_DEL(xen_psm_cpus_online, cpun); 808 for (irq = 0; irq < NR_IRQS; irq++) { 809 if (!ec_irq_needs_rebind(irq, cpun)) 810 continue; 811 xen_psm_rebind_irq(irq); 812 } 813 return (PSM_SUCCESS); 814 } 815 816 static void 817 xen_psm_enable_intr(processorid_t cpun) 818 { 819 int irq; 820 821 if (cpun == 0) 822 return; 823 824 CPUSET_ADD(xen_psm_cpus_online, cpun); 825 826 /* 827 * Rebalance device interrupts among online processors 828 */ 829 for (irq = 0; irq < NR_IRQS; irq++) { 830 if (!ec_irq_rebindable(irq)) 831 continue; 832 xen_psm_rebind_irq(irq); 833 } 834 } 835 836 /* 837 * This function will reprogram the timer. 838 * 839 * When in oneshot mode the argument is the absolute time in future at which to 840 * generate the interrupt. 841 * 842 * When in periodic mode, the argument is the interval at which the 843 * interrupts should be generated. There is no need to support the periodic 844 * mode timer change at this time. 845 * 846 * Note that we must be careful to convert from hrtime to Xen system time (see 847 * xpv_timestamp.c). 848 */ 849 static void 850 xen_psm_timer_reprogram(hrtime_t timer_req) 851 { 852 hrtime_t now, timer_new, time_delta, xen_time; 853 ulong_t flags; 854 855 flags = intr_clear(); 856 /* 857 * We should be called from high PIL context (CBE_HIGH_PIL), 858 * so kpreempt is disabled. 859 */ 860 861 now = xpv_gethrtime(); 862 xen_time = xpv_getsystime(); 863 if (timer_req <= now) { 864 /* 865 * requested to generate an interrupt in the past 866 * generate an interrupt as soon as possible 867 */ 868 time_delta = XEN_NSEC_PER_TICK; 869 } else 870 time_delta = timer_req - now; 871 872 timer_new = xen_time + time_delta; 873 if (HYPERVISOR_set_timer_op(timer_new) != 0) 874 panic("can't set hypervisor timer?"); 875 intr_restore(flags); 876 } 877 878 /* 879 * This function will enable timer interrupts. 880 */ 881 static void 882 xen_psm_timer_enable(void) 883 { 884 ec_unmask_irq(xen_clock_irq); 885 } 886 887 /* 888 * This function will disable timer interrupts on the current cpu. 889 */ 890 static void 891 xen_psm_timer_disable(void) 892 { 893 (void) ec_block_irq(xen_clock_irq); 894 /* 895 * If the clock irq is pending on this cpu then we need to 896 * clear the pending interrupt. 897 */ 898 ec_unpend_irq(xen_clock_irq); 899 } 900 901 /* 902 * 903 * The following functions are in the platform specific file so that they 904 * can be different functions depending on whether we are running on 905 * bare metal or a hypervisor. 906 */ 907 908 /* 909 * Allocate a free vector for irq at ipl. 910 */ 911 /* ARGSUSED */ 912 uchar_t 913 apic_allocate_vector(int ipl, int irq, int pri) 914 { 915 physdev_irq_t irq_op; 916 uchar_t vector; 917 918 irq_op.irq = irq; 919 920 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) 921 panic("Hypervisor alloc vector failed"); 922 vector = irq_op.vector; 923 /* 924 * No need to worry about vector colliding with our reserved vectors 925 * e.g. T_FASTTRAP, xen can differentiate between hardware and software 926 * generated traps and handle them properly. 927 */ 928 apic_vector_to_irq[vector] = (uchar_t)irq; 929 return (vector); 930 } 931 932 /* Mark vector as not being used by any irq */ 933 void 934 apic_free_vector(uchar_t vector) 935 { 936 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 937 } 938 939 /* 940 * This function allocate "count" vector(s) for the given "dip/pri/type" 941 */ 942 static int 943 apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, 944 int behavior) 945 { 946 int rcount, i; 947 uchar_t vector, cpu; 948 int irqno; 949 major_t major; 950 apic_irq_t *irqptr; 951 952 /* only supports MSI at the moment, will add MSI-X support later */ 953 if (type != DDI_INTR_TYPE_MSI) 954 return (0); 955 956 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " 957 "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", 958 (void *)dip, type, inum, pri, count, behavior)); 959 960 if (count > 1) { 961 if (behavior == DDI_INTR_ALLOC_STRICT && 962 (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) 963 return (0); 964 965 if (apic_multi_msi_enable == 0) 966 count = 1; 967 else if (count > apic_multi_msi_max) 968 count = apic_multi_msi_max; 969 } 970 971 /* 972 * XXPV - metal version takes all vectors avail at given pri. 973 * Why do that? For now just allocate count vectors. 974 */ 975 rcount = count; 976 977 mutex_enter(&airq_mutex); 978 979 /* 980 * XXPV - currently the hypervisor does not support MSI at all. 981 * It doesn't return consecutive vectors. This code is a first 982 * cut for the (future) time that MSI is supported. 983 */ 984 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 985 for (i = 0; i < rcount; i++) { 986 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == 987 INVALID_IRQ) { 988 mutex_exit(&airq_mutex); 989 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 990 "apic_allocate_irq failed\n")); 991 return (i); 992 } 993 apic_max_device_irq = max(irqno, apic_max_device_irq); 994 apic_min_device_irq = min(irqno, apic_min_device_irq); 995 irqptr = apic_irq_table[irqno]; 996 vector = apic_allocate_vector(pri, irqno, 0); 997 apic_vector_to_irq[vector] = (uchar_t)irqno; 998 #ifdef DEBUG 999 if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) 1000 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " 1001 "apic_vector_to_irq is not APIC_RESV_IRQ\n")); 1002 #endif 1003 1004 irqptr->airq_vector = vector; 1005 irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ 1006 irqptr->airq_intin_no = (uchar_t)rcount; 1007 irqptr->airq_ipl = pri; 1008 irqptr->airq_origirq = (uchar_t)(inum + i); 1009 irqptr->airq_share_id = 0; 1010 irqptr->airq_mps_intr_index = MSI_INDEX; 1011 irqptr->airq_dip = dip; 1012 irqptr->airq_major = major; 1013 if (i == 0) /* they all bound to the same cpu */ 1014 cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 1015 0xff, 0xff); 1016 else 1017 irqptr->airq_cpu = cpu; 1018 DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " 1019 "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, 1020 (void *)irqptr->airq_dip, irqptr->airq_vector, 1021 irqptr->airq_origirq, pri)); 1022 } 1023 mutex_exit(&airq_mutex); 1024 return (rcount); 1025 } 1026 1027 /* 1028 * The hypervisor doesn't permit access to local apics directly 1029 */ 1030 /* ARGSUSED */ 1031 uint32_t * 1032 mapin_apic(uint32_t addr, size_t len, int flags) 1033 { 1034 /* 1035 * Return a pointer to a memory area to fake out the 1036 * probe code that wants to read apic registers. 1037 * The dummy values will end up being ignored by xen 1038 * later on when they are used anyway. 1039 */ 1040 xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; 1041 return (xen_psm_dummy_apic); 1042 } 1043 1044 /* ARGSUSED */ 1045 uint32_t * 1046 mapin_ioapic(uint32_t addr, size_t len, int flags) 1047 { 1048 /* 1049 * Return non-null here to fake out configure code that calls this. 1050 * The i86xpv platform will not reference through the returned value.. 1051 */ 1052 return ((uint32_t *)0x1); 1053 } 1054 1055 /* ARGSUSED */ 1056 void 1057 mapout_apic(caddr_t addr, size_t len) 1058 { 1059 } 1060 1061 /* ARGSUSED */ 1062 void 1063 mapout_ioapic(caddr_t addr, size_t len) 1064 { 1065 } 1066 1067 uint32_t 1068 ioapic_read(int apic_ix, uint32_t reg) 1069 { 1070 physdev_apic_t apic; 1071 1072 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1073 apic.reg = reg; 1074 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) 1075 panic("read ioapic %d reg %d failed", apic_ix, reg); 1076 return (apic.value); 1077 } 1078 1079 void 1080 ioapic_write(int apic_ix, uint32_t reg, uint32_t value) 1081 { 1082 physdev_apic_t apic; 1083 1084 apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; 1085 apic.reg = reg; 1086 apic.value = value; 1087 if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) 1088 panic("write ioapic %d reg %d failed", apic_ix, reg); 1089 } 1090 1091 /* 1092 * Call rebind to do the actual programming. 1093 */ 1094 int 1095 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 1096 { 1097 apic_irq_t *irqptr; 1098 struct ioapic_reprogram_data *drep = NULL; 1099 int rv, cpu; 1100 cpuset_t cpus; 1101 1102 /* 1103 * Set cpu based on xen idea of online cpu's not apic tables. 1104 * Note that xen ignores/sets to it's own preferred value the 1105 * target cpu field when programming ioapic anyway. 1106 */ 1107 if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { 1108 CPUSET_ZERO(cpus); 1109 CPUSET_OR(cpus, xen_psm_cpus_online); 1110 } else { 1111 CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); 1112 } 1113 apic_irq_table[irq]->airq_cpu = cpu; 1114 if (deferred) { 1115 drep = (struct ioapic_reprogram_data *)p; 1116 ASSERT(drep != NULL); 1117 irqptr = drep->irqp; 1118 } else { 1119 irqptr = (apic_irq_t *)p; 1120 } 1121 ASSERT(irqptr != NULL); 1122 rv = apic_rebind(irqptr, cpu, drep); 1123 if (rv) { 1124 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 1125 cpu = 0; 1126 rv = apic_rebind(irqptr, cpu, drep); 1127 } 1128 /* 1129 * If rebind successful bind the irq to an event channel 1130 */ 1131 if (rv == 0) 1132 ec_setup_pirq(irq, irqptr->airq_ipl, cpus); 1133 return (rv); 1134 } 1135 1136 /* 1137 * Allocate a new vector for the given irq 1138 */ 1139 /* ARGSUSED */ 1140 uchar_t 1141 apic_modify_vector(uchar_t vector, int irq) 1142 { 1143 return (apic_allocate_vector(0, irq, 0)); 1144 } 1145 1146 /* 1147 * The rest of the file is just generic psm module boilerplate 1148 */ 1149 1150 static struct psm_ops xen_psm_ops = { 1151 xen_psm_probe, /* psm_probe */ 1152 1153 xen_psm_softinit, /* psm_init */ 1154 xen_psm_picinit, /* psm_picinit */ 1155 xen_psm_intr_enter, /* psm_intr_enter */ 1156 xen_psm_intr_exit, /* psm_intr_exit */ 1157 xen_psm_setspl, /* psm_setspl */ 1158 xen_psm_addspl, /* psm_addspl */ 1159 xen_psm_delspl, /* psm_delspl */ 1160 xen_psm_disable_intr, /* psm_disable_intr */ 1161 xen_psm_enable_intr, /* psm_enable_intr */ 1162 (int (*)(int))NULL, /* psm_softlvl_to_irq */ 1163 (void (*)(int))NULL, /* psm_set_softintr */ 1164 (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ 1165 (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ 1166 1167 xen_psm_clkinit, /* psm_clkinit */ 1168 xen_psm_get_clockirq, /* psm_get_clockirq */ 1169 xen_psm_hrtimeinit, /* psm_hrtimeinit */ 1170 xpv_gethrtime, /* psm_gethrtime */ 1171 1172 xen_psm_get_next_processorid, /* psm_get_next_processorid */ 1173 xen_psm_cpu_start, /* psm_cpu_start */ 1174 xen_psm_post_cpu_start, /* psm_post_cpu_start */ 1175 xen_psm_shutdown, /* psm_shutdown */ 1176 xen_psm_get_ipivect, /* psm_get_ipivect */ 1177 xen_psm_send_ipi, /* psm_send_ipi */ 1178 1179 xen_psm_translate_irq, /* psm_translate_irq */ 1180 1181 (void (*)(int, char *))NULL, /* psm_notify_error */ 1182 (void (*)(int msg))NULL, /* psm_notify_func */ 1183 xen_psm_timer_reprogram, /* psm_timer_reprogram */ 1184 xen_psm_timer_enable, /* psm_timer_enable */ 1185 xen_psm_timer_disable, /* psm_timer_disable */ 1186 (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ 1187 (void (*)(int, int))NULL, /* psm_preshutdown */ 1188 xen_intr_ops /* Advanced DDI Interrupt framework */ 1189 }; 1190 1191 static struct psm_info xen_psm_info = { 1192 PSM_INFO_VER01_5, /* version */ 1193 PSM_OWN_SYS_DEFAULT, /* ownership */ 1194 &xen_psm_ops, /* operation */ 1195 "xen_psm", /* machine name */ 1196 "platform module %I%" /* machine descriptions */ 1197 }; 1198 1199 static void *xen_psm_hdlp; 1200 1201 int 1202 _init(void) 1203 { 1204 return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); 1205 } 1206 1207 int 1208 _fini(void) 1209 { 1210 return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); 1211 } 1212 1213 int 1214 _info(struct modinfo *modinfop) 1215 { 1216 return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); 1217 } 1218