/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #define PSMI_1_5 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Global Data */ int xen_psm_verbose = 0; volatile uint32_t *apicadr = NULL; /* dummy, so common code will link */ int apic_error = 0; int apic_verbose = 0; cpuset_t apic_cpumask; int apic_forceload = 0; uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15 }; uchar_t apic_ipltopri[MAXIPL + 1]; uchar_t apic_ipls[APIC_AVAIL_VECTOR]; uint_t apic_picinit_called; apic_cpus_info_t *apic_cpus; int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; /* use to make sure only one cpu handles the nmi */ static lock_t xen_psm_nmi_lock; int xen_psm_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ int xen_psm_panic_on_nmi = 0; int xen_psm_num_nmis = 0; cpuset_t xen_psm_cpus_online; /* online cpus */ int xen_psm_ncpus = 1; /* cpu count */ int xen_psm_next_bind_cpu; /* next cpu to bind an interrupt to */ /* * XXPV we flag MSI as not supported, since the hypervisor currently doesn't * support MSI at all. Change this initialization to zero when MSI is * supported. */ int xen_support_msi = -1; static int xen_clock_irq = INVALID_IRQ; /* flag definitions for xen_psm_verbose */ #define XEN_PSM_VERBOSE_IRQ_FLAG 0x00000001 #define XEN_PSM_VERBOSE_POWEROFF_FLAG 0x00000002 #define XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000004 #define XEN_PSM_VERBOSE_IRQ(fmt) \ if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \ cmn_err fmt; #define XEN_PSM_VERBOSE_POWEROFF(fmt) \ if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \ prom_printf fmt; /* * Dummy apic array to point common routines at that want to do some apic * manipulation. Xen doesn't allow guest apic access so we point at these * memory locations to fake out those who want to do apic fiddling. */ uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1]; static struct psm_info xen_psm_info; static void xen_psm_setspl(int); static int apic_alloc_vectors(dev_info_t *, int, int, int, int, int); /* * Local support routines */ /* * Select vcpu to bind xen virtual device interrupt to. */ /*ARGSUSED*/ int xen_psm_bind_intr(int irq) { int bind_cpu, test_cpu; apic_irq_t *irqptr; if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY) return (IRQ_UNBOUND); if (irq <= APIC_MAX_VECTOR) irqptr = apic_irq_table[irq]; else irqptr = NULL; if (irqptr && (irqptr->airq_cpu & IRQ_USER_BOUND)) { bind_cpu = irqptr->airq_cpu; test_cpu = bind_cpu & ~IRQ_USER_BOUND; if (!CPU_IN_SET(xen_psm_cpus_online, test_cpu)) bind_cpu = 0; goto done; } if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { do { bind_cpu = xen_psm_next_bind_cpu++; if (xen_psm_next_bind_cpu >= xen_psm_ncpus) xen_psm_next_bind_cpu = 0; } while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu)); } else { bind_cpu = 0; } done: return (bind_cpu); } /* * Autoconfiguration Routines */ static int xen_psm_probe(void) { int ret = PSM_SUCCESS; if (DOMAIN_IS_INITDOMAIN(xen_info)) ret = apic_probe_common(xen_psm_info.p_mach_idstring); return (ret); } static void xen_psm_softinit(void) { /* LINTED logical expression always true: op "||" */ ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t)); CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0); if (DOMAIN_IS_INITDOMAIN(xen_info)) { apic_init_common(); } } #define XEN_NSEC_PER_TICK 10 /* XXX - assume we have a 100 Mhz clock */ /*ARGSUSED*/ static int xen_psm_clkinit(int hertz) { extern enum tod_fault_type tod_fault(enum tod_fault_type, int); extern int dosynctodr; /* * domU cannot set the TOD hardware, fault the TOD clock now to * indicate that and turn off attempts to sync TOD hardware * with the hires timer. */ if (!DOMAIN_IS_INITDOMAIN(xen_info)) { mutex_enter(&tod_lock); (void) tod_fault(TOD_RDONLY, 0); dosynctodr = 0; mutex_exit(&tod_lock); } /* * The hypervisor provides a timer based on the local APIC timer. * The interface supports requests of nanosecond resolution. * A common frequency of the apic clock is 100 Mhz which * gives a resolution of 10 nsec per tick. What we would really like * is a way to get the ns per tick value from xen. * XXPV - This is an assumption that needs checking and may change */ return (XEN_NSEC_PER_TICK); } static void xen_psm_hrtimeinit(void) { extern int gethrtime_hires; gethrtime_hires = 1; } /* xen_psm NMI handler */ /*ARGSUSED*/ static void xen_psm_nmi_intr(caddr_t arg, struct regs *rp) { xen_psm_num_nmis++; if (!lock_try(&xen_psm_nmi_lock)) return; if (xen_psm_kmdb_on_nmi && psm_debugger()) { debug_enter("NMI received: entering kmdb\n"); } else if (xen_psm_panic_on_nmi) { /* Keep panic from entering kmdb. */ nopanicdebug = 1; panic("NMI received\n"); } else { /* * prom_printf is the best shot we have of something which is * problem free from high level/NMI type of interrupts */ prom_printf("NMI received\n"); } lock_clear(&xen_psm_nmi_lock); } static void xen_psm_picinit() { int cpu, irqno; cpuset_t cpus; if (DOMAIN_IS_INITDOMAIN(xen_info)) { /* set a flag so we know we have run xen_psm_picinit() */ apic_picinit_called = 1; LOCK_INIT_CLEAR(&apic_ioapic_lock); /* XXPV - do we need to do this? */ picsetup(); /* initialise the 8259 */ /* enable apic mode if imcr present */ /* XXPV - do we need to do this either? */ if (apic_imcrp) { outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); } ioapic_init_intr(IOAPIC_NOMASK); /* * We never called xen_psm_addspl() when the SCI * interrupt was added because that happened before the * PSM module was loaded. Fix that up here by doing * any missed operations (e.g. bind to CPU) */ if ((irqno = apic_sci_vect) > 0) { if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { CPUSET_ZERO(cpus); CPUSET_OR(cpus, xen_psm_cpus_online); } else { CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); } ec_set_irq_affinity(irqno, cpus); apic_irq_table[irqno]->airq_temp_cpu = (uchar_t)(cpu & ~IRQ_USER_BOUND); ec_enable_irq(irqno); } } /* add nmi handler - least priority nmi handler */ LOCK_INIT_CLEAR(&xen_psm_nmi_lock); if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr, "xVM_psm NMI handler", (caddr_t)NULL)) cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler"); } /* * generates an interprocessor interrupt to another CPU */ static void xen_psm_send_ipi(int cpun, int ipl) { ulong_t flag = intr_clear(); ec_send_ipi(ipl, cpun); intr_restore(flag); } /*ARGSUSED*/ static int xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl) { int cpu, ret; cpuset_t cpus; /* * We are called at splhi() so we can't call anything that might end * up trying to context switch. */ if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && DOMAIN_IS_INITDOMAIN(xen_info)) { /* * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq() */ ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl); } else { /* * Set priority/affinity/enable for non PIRQs */ ret = ec_set_irq_priority(irqno, ipl); ASSERT(ret == 0); if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) { CPUSET_ZERO(cpus); CPUSET_OR(cpus, xen_psm_cpus_online); } else { CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); } ec_set_irq_affinity(irqno, cpus); ec_enable_irq(irqno); } return (ret); } /* * Acquire ownership of this irq on this cpu */ void xen_psm_acquire_irq(int irq) { ulong_t flags; int cpuid; /* * If the irq is currently being serviced by another cpu * we busy-wait for the other cpu to finish. Take any * pending interrupts before retrying. */ do { flags = intr_clear(); cpuid = ec_block_irq(irq); intr_restore(flags); } while (cpuid != CPU->cpu_id); } /*ARGSUSED*/ static int xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl) { apic_irq_t *irqptr; int err = PSM_SUCCESS; if (irqno >= PIRQ_BASE && irqno < NR_PIRQS && DOMAIN_IS_INITDOMAIN(xen_info)) { irqptr = apic_irq_table[irqno]; /* * unbind if no more sharers of this irq/evtchn */ if (irqptr->airq_share == 1) { xen_psm_acquire_irq(irqno); ec_unbind_irq(irqno); } err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl); /* * If still in use reset priority */ if (!err && irqptr->airq_share != 0) { err = ec_set_irq_priority(irqno, max_ipl); return (err); } } else { xen_psm_acquire_irq(irqno); ec_unbind_irq(irqno); } return (err); } static processorid_t xen_psm_get_next_processorid(processorid_t id) { if (id == -1) return (0); for (id++; id < NCPU; id++) { switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) { case 0: /* yeah, that one's there */ return (id); default: case X_EINVAL: /* out of range */ return (-1); case X_ENOENT: /* not present in the domain */ /* * It's not clear that we -need- to keep looking * at this point, if, e.g., we can guarantee * the hypervisor always keeps a contiguous range * of vcpus around this is equivalent to "out of range". * * But it would be sad to miss a vcpu we're * supposed to be using .. */ break; } } return (-1); } /* * XXPV - undo the start cpu op change; return to ignoring this value * - also tweak error handling in main startup loop */ /*ARGSUSED*/ static int xen_psm_cpu_start(processorid_t id, caddr_t arg) { int ret; ASSERT(id > 0); CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id); ec_bind_cpu_ipis(id); (void) ec_bind_virq_to_irq(VIRQ_TIMER, id); if ((ret = xen_vcpu_up(id)) == 0) xen_psm_ncpus++; else ret = EINVAL; return (ret); } /* * Allocate an irq for inter cpu signaling */ /*ARGSUSED*/ static int xen_psm_get_ipivect(int ipl, int type) { return (ec_bind_ipi_to_irq(ipl, 0)); } /*ARGSUSED*/ static int xen_psm_get_clockirq(int ipl) { if (xen_clock_irq != INVALID_IRQ) return (xen_clock_irq); xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0); return (xen_clock_irq); } /*ARGSUSED*/ static void xen_psm_shutdown(int cmd, int fcn) { XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn)); switch (cmd) { case A_SHUTDOWN: switch (fcn) { case AD_BOOT: case AD_IBOOT: (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); break; case AD_POWEROFF: /* fall through if domU or if poweroff fails */ if (DOMAIN_IS_INITDOMAIN(xen_info)) if (apic_enable_acpi) (void) acpi_poweroff(); /* FALLTHRU */ case AD_HALT: default: (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); break; } break; case A_REBOOT: (void) HYPERVISOR_shutdown(SHUTDOWN_reboot); break; default: return; } } static int xen_psm_translate_irq(dev_info_t *dip, int irqno) { if (dip == NULL) { XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d" " dip = NULL\n", irqno)); return (irqno); } return (irqno); } /* * xen_psm_intr_enter() acks the event that triggered the interrupt and * returns the new priority level, */ /*ARGSUSED*/ static int xen_psm_intr_enter(int ipl, int *vector) { int newipl; uint_t intno; cpu_t *cpu = CPU; intno = (*vector); ASSERT(intno < NR_IRQS); ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); ec_clear_irq(intno); newipl = autovect[intno].avh_hi_pri; if (newipl == 0) { /* * (newipl == 0) means we have no service routines for this * vector. We will treat this as a spurious interrupt. * We have cleared the pending bit already, clear the event * mask and return a spurious interrupt. This case can happen * when an interrupt delivery is racing with the removal of * of the service routine for that interrupt. */ ec_unmask_irq(intno); newipl = -1; /* flag spurious interrupt */ } else if (newipl <= cpu->cpu_pri) { /* * (newipl <= cpu->cpu_pri) means that we must be trying to * service a vector that was shared with a higher priority * isr. The higher priority handler has been removed and * we need to service this int. We can't return a lower * priority than current cpu priority. Just synthesize a * priority to return that should be acceptable. */ newipl = cpu->cpu_pri + 1; /* synthetic priority */ } return (newipl); } /* * xen_psm_intr_exit() restores the old interrupt * priority level after processing an interrupt. * It is called with interrupts disabled, and does not enable interrupts. */ /* ARGSUSED */ static void xen_psm_intr_exit(int ipl, int vector) { ec_try_unmask_irq(vector); xen_psm_setspl(ipl); } intr_exit_fn_t psm_intr_exit_fn(void) { return (xen_psm_intr_exit); } /* * Check if new ipl level allows delivery of previously unserviced events */ static void xen_psm_setspl(int ipl) { struct cpu *cpu = CPU; volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; uint16_t pending; ASSERT(vci->evtchn_upcall_mask != 0); /* * If new ipl level will enable any pending interrupts, setup so the * upcoming sti will cause us to get an upcall. */ pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1); if (pending) { int i; ulong_t pending_sels = 0; volatile ulong_t *selp; struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; for (i = bsrw_insn(pending); i > ipl; i--) pending_sels |= cpe->pending_sel[i]; ASSERT(pending_sels); selp = (volatile ulong_t *)&vci->evtchn_pending_sel; atomic_or_ulong(selp, pending_sels); vci->evtchn_upcall_pending = 1; } } /* * This function provides external interface to the nexus for all * functionality related to the new DDI interrupt framework. * * Input: * dip - pointer to the dev_info structure of the requested device * hdlp - pointer to the internal interrupt handle structure for the * requested interrupt * intr_op - opcode for this call * result - pointer to the integer that will hold the result to be * passed back if return value is PSM_SUCCESS * * Output: * return value is either PSM_SUCCESS or PSM_FAILURE */ int xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp, psm_intr_op_t intr_op, int *result) { int cap; int err; int new_priority; apic_irq_t *irqp; struct intrspec *ispec; DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p " "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op)); switch (intr_op) { case PSM_INTR_OP_CHECK_MSI: if (!DOMAIN_IS_INITDOMAIN(xen_info)) { *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX); break; } /* * Check MSI/X is supported or not at APIC level and * masked off the MSI/X bits in hdlp->ih_type if not * supported before return. If MSI/X is supported, * leave the ih_type unchanged and return. * * hdlp->ih_type passed in from the nexus has all the * interrupt types supported by the device. */ if (xen_support_msi == 0) { /* * if xen_support_msi is not set, call * apic_check_msi_support() to check whether msi * is supported first */ if (apic_check_msi_support() == PSM_SUCCESS) xen_support_msi = 1; else xen_support_msi = -1; } if (xen_support_msi == 1) *result = hdlp->ih_type; else *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX); break; case PSM_INTR_OP_ALLOC_VECTORS: *result = apic_alloc_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type, (int)(uintptr_t)hdlp->ih_scratch2); break; case PSM_INTR_OP_FREE_VECTORS: apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type); break; case PSM_INTR_OP_NAVAIL_VECTORS: /* * XXPV - maybe we should make this be: * min(APIC_VECTOR_PER_IPL, count of all avail vectors); */ if (DOMAIN_IS_INITDOMAIN(xen_info)) *result = APIC_VECTOR_PER_IPL; else *result = 1; break; case PSM_INTR_OP_XLATE_VECTOR: ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp; if (ispec->intrspec_vec >= PIRQ_BASE && ispec->intrspec_vec < NR_PIRQS && DOMAIN_IS_INITDOMAIN(xen_info)) { *result = apic_introp_xlate(dip, ispec, hdlp->ih_type); } else { *result = ispec->intrspec_vec; } break; case PSM_INTR_OP_GET_PENDING: /* XXPV - is this enough for dom0 or do we need to ref ioapic */ *result = ec_pending_irq(hdlp->ih_vector); break; case PSM_INTR_OP_CLEAR_MASK: /* XXPV - is this enough for dom0 or do we need to set ioapic */ if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) return (PSM_FAILURE); ec_enable_irq(hdlp->ih_vector); break; case PSM_INTR_OP_SET_MASK: /* XXPV - is this enough for dom0 or do we need to set ioapic */ if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) return (PSM_FAILURE); ec_disable_irq(hdlp->ih_vector); break; case PSM_INTR_OP_GET_CAP: cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE; if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) cap |= DDI_INTR_FLAG_MASKABLE; *result = cap; break; case PSM_INTR_OP_GET_SHARED: if (DOMAIN_IS_INITDOMAIN(xen_info)) { if (hdlp->ih_type != DDI_INTR_TYPE_FIXED) return (PSM_FAILURE); if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL) return (PSM_FAILURE); *result = irqp->airq_share ? 1: 0; } else { return (PSM_FAILURE); } break; case PSM_INTR_OP_SET_PRI: new_priority = *(int *)result; err = ec_set_irq_priority(hdlp->ih_vector, new_priority); if (err != 0) return (PSM_FAILURE); break; case PSM_INTR_OP_GET_INTR: if (!DOMAIN_IS_INITDOMAIN(xen_info)) return (PSM_FAILURE); /* * The interrupt handle given here has been allocated * specifically for this command, and ih_private carries * a pointer to a apic_get_intr_t. */ if (apic_get_vector_intr_info( hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS) return (PSM_FAILURE); break; case PSM_INTR_OP_SET_CAP: /* FALLTHRU */ default: return (PSM_FAILURE); } return (PSM_SUCCESS); } static void xen_psm_rebind_irq(int irq) { cpuset_t ncpu; processorid_t newcpu; apic_irq_t *irqptr; newcpu = xen_psm_bind_intr(irq); if (newcpu == IRQ_UNBOUND) { CPUSET_ZERO(ncpu); CPUSET_OR(ncpu, xen_psm_cpus_online); } else { CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND); } ec_set_irq_affinity(irq, ncpu); if (irq <= APIC_MAX_VECTOR) { irqptr = apic_irq_table[irq]; ASSERT(irqptr != NULL); irqptr->airq_temp_cpu = (uchar_t)newcpu; } } /* * Disable all device interrupts for the given cpu. * High priority interrupts are not disabled and will still be serviced. */ static int xen_psm_disable_intr(processorid_t cpun) { int irq; /* * Can't offline VCPU 0 on this hypervisor. There's no reason * anyone would want to given that the CPUs are virtual. Also note * that the hypervisor requires suspend/resume to be on VCPU 0. */ if (cpun == 0) return (PSM_FAILURE); CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun); for (irq = 0; irq < NR_IRQS; irq++) { if (!ec_irq_needs_rebind(irq, cpun)) continue; xen_psm_rebind_irq(irq); } return (PSM_SUCCESS); } static void xen_psm_enable_intr(processorid_t cpun) { int irq; if (cpun == 0) return; CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun); /* * Rebalance device interrupts among online processors */ for (irq = 0; irq < NR_IRQS; irq++) { if (!ec_irq_rebindable(irq)) continue; xen_psm_rebind_irq(irq); } if (DOMAIN_IS_INITDOMAIN(xen_info)) { apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; } } static int xen_psm_post_cpu_start() { processorid_t cpun; cpun = psm_get_cpu_id(); if (DOMAIN_IS_INITDOMAIN(xen_info)) { /* * Non-virtualized environments can call psm_post_cpu_start * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set. * xen_psm_post_cpu_start() is only called from boot. */ apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE; } return (PSM_SUCCESS); } /* * This function will reprogram the timer. * * When in oneshot mode the argument is the absolute time in future at which to * generate the interrupt. * * When in periodic mode, the argument is the interval at which the * interrupts should be generated. There is no need to support the periodic * mode timer change at this time. * * Note that we must be careful to convert from hrtime to Xen system time (see * xpv_timestamp.c). */ static void xen_psm_timer_reprogram(hrtime_t timer_req) { hrtime_t now, timer_new, time_delta, xen_time; ulong_t flags; flags = intr_clear(); /* * We should be called from high PIL context (CBE_HIGH_PIL), * so kpreempt is disabled. */ now = xpv_gethrtime(); xen_time = xpv_getsystime(); if (timer_req <= now) { /* * requested to generate an interrupt in the past * generate an interrupt as soon as possible */ time_delta = XEN_NSEC_PER_TICK; } else time_delta = timer_req - now; timer_new = xen_time + time_delta; if (HYPERVISOR_set_timer_op(timer_new) != 0) panic("can't set hypervisor timer?"); intr_restore(flags); } /* * This function will enable timer interrupts. */ static void xen_psm_timer_enable(void) { ec_unmask_irq(xen_clock_irq); } /* * This function will disable timer interrupts on the current cpu. */ static void xen_psm_timer_disable(void) { (void) ec_block_irq(xen_clock_irq); /* * If the clock irq is pending on this cpu then we need to * clear the pending interrupt. */ ec_unpend_irq(xen_clock_irq); } /* * * The following functions are in the platform specific file so that they * can be different functions depending on whether we are running on * bare metal or a hypervisor. */ /* * Allocate a free vector for irq at ipl. */ /* ARGSUSED */ uchar_t apic_allocate_vector(int ipl, int irq, int pri) { physdev_irq_t irq_op; uchar_t vector; irq_op.irq = irq; if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) panic("Hypervisor alloc vector failed"); vector = irq_op.vector; /* * No need to worry about vector colliding with our reserved vectors * e.g. T_FASTTRAP, xen can differentiate between hardware and software * generated traps and handle them properly. */ apic_vector_to_irq[vector] = (uchar_t)irq; return (vector); } /* Mark vector as not being used by any irq */ void apic_free_vector(uchar_t vector) { apic_vector_to_irq[vector] = APIC_RESV_IRQ; } /* * This function allocate "count" vector(s) for the given "dip/pri/type" */ static int apic_alloc_vectors(dev_info_t *dip, int inum, int count, int pri, int type, int behavior) { int rcount, i; uchar_t vector, cpu; int irqno; major_t major; apic_irq_t *irqptr; /* only supports MSI at the moment, will add MSI-X support later */ if (type != DDI_INTR_TYPE_MSI) return (0); DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: dip=0x%p type=%d " "inum=0x%x pri=0x%x count=0x%x behavior=%d\n", (void *)dip, type, inum, pri, count, behavior)); if (count > 1) { if (behavior == DDI_INTR_ALLOC_STRICT && (apic_multi_msi_enable == 0 || count > apic_multi_msi_max)) return (0); if (apic_multi_msi_enable == 0) count = 1; else if (count > apic_multi_msi_max) count = apic_multi_msi_max; } /* * XXPV - metal version takes all vectors avail at given pri. * Why do that? For now just allocate count vectors. */ rcount = count; mutex_enter(&airq_mutex); /* * XXPV - currently the hypervisor does not support MSI at all. * It doesn't return consecutive vectors. This code is a first * cut for the (future) time that MSI is supported. */ major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; for (i = 0; i < rcount; i++) { if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == INVALID_IRQ) { mutex_exit(&airq_mutex); DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " "apic_allocate_irq failed\n")); return (i); } apic_max_device_irq = max(irqno, apic_max_device_irq); apic_min_device_irq = min(irqno, apic_min_device_irq); irqptr = apic_irq_table[irqno]; vector = apic_allocate_vector(pri, irqno, 0); apic_vector_to_irq[vector] = (uchar_t)irqno; #ifdef DEBUG if (apic_vector_to_irq[vector] != APIC_RESV_IRQ) DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: " "apic_vector_to_irq is not APIC_RESV_IRQ\n")); #endif irqptr->airq_vector = vector; irqptr->airq_ioapicindex = (uchar_t)inum; /* start */ irqptr->airq_intin_no = (uchar_t)rcount; irqptr->airq_ipl = pri; irqptr->airq_origirq = (uchar_t)(inum + i); irqptr->airq_share_id = 0; irqptr->airq_mps_intr_index = MSI_INDEX; irqptr->airq_dip = dip; irqptr->airq_major = major; if (i == 0) /* they all bound to the same cpu */ cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff); else irqptr->airq_cpu = cpu; DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_vectors: irq=0x%x " "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno, (void *)irqptr->airq_dip, irqptr->airq_vector, irqptr->airq_origirq, pri)); } mutex_exit(&airq_mutex); return (rcount); } /* * The hypervisor doesn't permit access to local apics directly */ /* ARGSUSED */ uint32_t * mapin_apic(uint32_t addr, size_t len, int flags) { /* * Return a pointer to a memory area to fake out the * probe code that wants to read apic registers. * The dummy values will end up being ignored by xen * later on when they are used anyway. */ xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS; return (xen_psm_dummy_apic); } /* ARGSUSED */ uint32_t * mapin_ioapic(uint32_t addr, size_t len, int flags) { /* * Return non-null here to fake out configure code that calls this. * The i86xpv platform will not reference through the returned value.. */ return ((uint32_t *)0x1); } /* ARGSUSED */ void mapout_apic(caddr_t addr, size_t len) { } /* ARGSUSED */ void mapout_ioapic(caddr_t addr, size_t len) { } uint32_t ioapic_read(int apic_ix, uint32_t reg) { physdev_apic_t apic; apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; apic.reg = reg; if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic)) panic("read ioapic %d reg %d failed", apic_ix, reg); return (apic.value); } void ioapic_write(int apic_ix, uint32_t reg, uint32_t value) { physdev_apic_t apic; apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix]; apic.reg = reg; apic.value = value; if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic)) panic("write ioapic %d reg %d failed", apic_ix, reg); } /* * Call rebind to do the actual programming. */ int apic_setup_io_intr(void *p, int irq, boolean_t deferred) { apic_irq_t *irqptr; struct ioapic_reprogram_data *drep = NULL; int rv, cpu; cpuset_t cpus; /* * Set cpu based on xen idea of online cpu's not apic tables. * Note that xen ignores/sets to it's own preferred value the * target cpu field when programming ioapic anyway. */ if ((cpu = xen_psm_bind_intr(irq)) == IRQ_UNBOUND) { CPUSET_ZERO(cpus); CPUSET_OR(cpus, xen_psm_cpus_online); } else { CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND); } apic_irq_table[irq]->airq_cpu = cpu; if (deferred) { drep = (struct ioapic_reprogram_data *)p; ASSERT(drep != NULL); irqptr = drep->irqp; } else { irqptr = (apic_irq_t *)p; } ASSERT(irqptr != NULL); rv = apic_rebind(irqptr, cpu, drep); if (rv) { /* CPU is not up or interrupt is disabled. Fall back to 0 */ cpu = 0; rv = apic_rebind(irqptr, cpu, drep); } /* * If rebind successful bind the irq to an event channel */ if (rv == 0) { ec_setup_pirq(irq, irqptr->airq_ipl, &cpus); CPUSET_FIND(cpus, cpu); apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND; } return (rv); } /* * Allocate a new vector for the given irq */ /* ARGSUSED */ uchar_t apic_modify_vector(uchar_t vector, int irq) { return (apic_allocate_vector(0, irq, 0)); } /* * The rest of the file is just generic psm module boilerplate */ static struct psm_ops xen_psm_ops = { xen_psm_probe, /* psm_probe */ xen_psm_softinit, /* psm_init */ xen_psm_picinit, /* psm_picinit */ xen_psm_intr_enter, /* psm_intr_enter */ xen_psm_intr_exit, /* psm_intr_exit */ xen_psm_setspl, /* psm_setspl */ xen_psm_addspl, /* psm_addspl */ xen_psm_delspl, /* psm_delspl */ xen_psm_disable_intr, /* psm_disable_intr */ xen_psm_enable_intr, /* psm_enable_intr */ (int (*)(int))NULL, /* psm_softlvl_to_irq */ (void (*)(int))NULL, /* psm_set_softintr */ (void (*)(processorid_t))NULL, /* psm_set_idlecpu */ (void (*)(processorid_t))NULL, /* psm_unset_idlecpu */ xen_psm_clkinit, /* psm_clkinit */ xen_psm_get_clockirq, /* psm_get_clockirq */ xen_psm_hrtimeinit, /* psm_hrtimeinit */ xpv_gethrtime, /* psm_gethrtime */ xen_psm_get_next_processorid, /* psm_get_next_processorid */ xen_psm_cpu_start, /* psm_cpu_start */ xen_psm_post_cpu_start, /* psm_post_cpu_start */ xen_psm_shutdown, /* psm_shutdown */ xen_psm_get_ipivect, /* psm_get_ipivect */ xen_psm_send_ipi, /* psm_send_ipi */ xen_psm_translate_irq, /* psm_translate_irq */ (void (*)(int, char *))NULL, /* psm_notify_error */ (void (*)(int msg))NULL, /* psm_notify_func */ xen_psm_timer_reprogram, /* psm_timer_reprogram */ xen_psm_timer_enable, /* psm_timer_enable */ xen_psm_timer_disable, /* psm_timer_disable */ (void (*)(void *arg))NULL, /* psm_post_cyclic_setup */ (void (*)(int, int))NULL, /* psm_preshutdown */ xen_intr_ops /* Advanced DDI Interrupt framework */ }; static struct psm_info xen_psm_info = { PSM_INFO_VER01_5, /* version */ PSM_OWN_EXCLUSIVE, /* ownership */ &xen_psm_ops, /* operation */ "xVM_psm", /* machine name */ "platform module %I%" /* machine descriptions */ }; static void *xen_psm_hdlp; int _init(void) { return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info)); } int _fini(void) { return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info)); } int _info(struct modinfo *modinfop) { return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop)); }