/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Logical domain channel devices are devices implemented entirely * in software; cnex is the nexus for channel-devices. They use * the HV channel interfaces via the LDC transport module to send * and receive data and to register callbacks. */ #include <sys/types.h> #include <sys/cmn_err.h> #include <sys/conf.h> #include <sys/ddi.h> #include <sys/ddi_impldefs.h> #include <sys/devops.h> #include <sys/instance.h> #include <sys/modctl.h> #include <sys/open.h> #include <sys/stat.h> #include <sys/sunddi.h> #include <sys/sunndi.h> #include <sys/systm.h> #include <sys/mkdev.h> #include <sys/machsystm.h> #include <sys/intreg.h> #include <sys/intr.h> #include <sys/ddi_intr_impl.h> #include <sys/ivintr.h> #include <sys/hypervisor_api.h> #include <sys/ldc.h> #include <sys/cnex.h> #include <sys/mach_descrip.h> #include <sys/hsvc.h> #include <sys/sdt.h> /* * Internal functions/information */ static struct cnex_intr_map cnex_class_to_intr[] = { {LDC_DEV_GENERIC, PIL_3, 0}, {LDC_DEV_BLK, PIL_4, 10}, {LDC_DEV_BLK_SVC, PIL_3, 10}, {LDC_DEV_NT, PIL_6, 35}, {LDC_DEV_NT_SVC, PIL_4, 35}, {LDC_DEV_SERIAL, PIL_6, 0} }; #define CNEX_MAX_DEVS (sizeof (cnex_class_to_intr) / \ sizeof (cnex_class_to_intr[0])) #define CNEX_TX_INTR_WEIGHT 0 #define SUN4V_REG_SPEC2CFG_HDL(x) ((x >> 32) & ~(0xfull << 28)) static clock_t cnex_wait_usecs = 1000; /* wait time in usecs */ static int cnex_wait_retries = 3; static void *cnex_state; static uint_t cnex_intr_wrapper(caddr_t arg); static dev_info_t *cnex_find_chan_dip(dev_info_t *dip, uint64_t chan_id, md_t *mdp, mde_cookie_t mde); /* * Channel Interrupt Distribution * * In order to balance interrupts among available CPUs, we use * the intr_dist_cpuid_{add,remove}_device_weight() interface to * assign weights to channel interrupts. These weights, which are * defined in the cnex_intr_map structure, influence which CPU * is returned by intr_dist_cpuid() when called via the cnex * interrupt redistribution callback cnex_intr_redist(). * Interrupts for VIO devclass channels are given more weight than * other interrupts because they are expected to occur more * frequently and have a larger impact on overall performance. * Transmit interrupts are given a zero weight because they are * not used. * * The interrupt weights influence the target CPU selection when * interrupts are redistributed and when they are added. However, * removal of interrupts can unbalance the distribution even if * they are removed in converse order--compared to the order they * are added. This can occur when interrupts are removed after * redistribution occurs. * * Channel interrupt weights affect interrupt-CPU distribution * relative to other weighted interrupts on the system. For VIO * devclass channels, values are chosen to match those used by * the PCI express nexus driver for net and storage devices. */ static void cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight); static int cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo); static int cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo); static int32_t cnex_class_weight(ldc_dev_t devclass); /* * Debug info */ #ifdef DEBUG /* * Print debug messages * * set cnexdbg to 0xf for enabling all msgs * 0x8 - Errors * 0x4 - Warnings * 0x2 - All debug messages * 0x1 - Minimal debug messages */ int cnexdbg = 0x8; static void cnexdebug(const char *fmt, ...) { char buf[512]; va_list ap; va_start(ap, fmt); (void) vsprintf(buf, fmt, ap); va_end(ap); cmn_err(CE_CONT, "%s\n", buf); } #define D1 \ if (cnexdbg & 0x01) \ cnexdebug #define D2 \ if (cnexdbg & 0x02) \ cnexdebug #define DWARN \ if (cnexdbg & 0x04) \ cnexdebug #define DERR \ if (cnexdbg & 0x08) \ cnexdebug #else #define D1 #define D2 #define DWARN #define DERR #endif /* * Config information */ static int cnex_attach(dev_info_t *, ddi_attach_cmd_t); static int cnex_detach(dev_info_t *, ddi_detach_cmd_t); static int cnex_open(dev_t *, int, int, cred_t *); static int cnex_close(dev_t, int, int, cred_t *); static int cnex_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); static int cnex_ctl(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *, void *); static struct bus_ops cnex_bus_ops = { BUSO_REV, nullbusmap, /* bus_map */ NULL, /* bus_get_intrspec */ NULL, /* bus_add_intrspec */ NULL, /* bus_remove_intrspec */ i_ddi_map_fault, /* bus_map_fault */ ddi_no_dma_map, /* bus_dma_map */ ddi_no_dma_allochdl, /* bus_dma_allochdl */ NULL, /* bus_dma_freehdl */ NULL, /* bus_dma_bindhdl */ NULL, /* bus_dma_unbindhdl */ NULL, /* bus_dma_flush */ NULL, /* bus_dma_win */ NULL, /* bus_dma_ctl */ cnex_ctl, /* bus_ctl */ ddi_bus_prop_op, /* bus_prop_op */ 0, /* bus_get_eventcookie */ 0, /* bus_add_eventcall */ 0, /* bus_remove_eventcall */ 0, /* bus_post_event */ NULL, /* bus_intr_ctl */ NULL, /* bus_config */ NULL, /* bus_unconfig */ NULL, /* bus_fm_init */ NULL, /* bus_fm_fini */ NULL, /* bus_fm_access_enter */ NULL, /* bus_fm_access_exit */ NULL, /* bus_power */ NULL /* bus_intr_op */ }; static struct cb_ops cnex_cb_ops = { cnex_open, /* open */ cnex_close, /* close */ nodev, /* strategy */ nodev, /* print */ nodev, /* dump */ nodev, /* read */ nodev, /* write */ cnex_ioctl, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ nochpoll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_MP | D_NEW | D_HOTPLUG /* Driver compatibility flag */ }; static struct dev_ops cnex_ops = { DEVO_REV, /* devo_rev, */ 0, /* refcnt */ ddi_getinfo_1to1, /* info */ nulldev, /* identify */ nulldev, /* probe */ cnex_attach, /* attach */ cnex_detach, /* detach */ nodev, /* reset */ &cnex_cb_ops, /* driver operations */ &cnex_bus_ops, /* bus operations */ nulldev, /* power */ ddi_quiesce_not_needed, /* quiesce */ }; /* * Module linkage information for the kernel. */ static struct modldrv modldrv = { &mod_driverops, "sun4v channel-devices nexus", &cnex_ops, }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modldrv, NULL }; int _init(void) { int err; uint64_t majornum; uint64_t minornum; /* * Check HV intr group api versioning. * Note that cnex assumes interrupt cookies is * in version 1.0 of the intr group api. */ if ((err = hsvc_version(HSVC_GROUP_INTR, &majornum, &minornum)) != 0) { cmn_err(CE_WARN, "cnex: failed to get intr api " "group versioning errno=%d", err); return (err); } else if ((majornum != 1) && (majornum != 2)) { cmn_err(CE_WARN, "cnex: unsupported intr api group: " "maj:0x%lx, min:0x%lx", majornum, minornum); return (ENOTSUP); } if ((err = ddi_soft_state_init(&cnex_state, sizeof (cnex_soft_state_t), 0)) != 0) { return (err); } if ((err = mod_install(&modlinkage)) != 0) { ddi_soft_state_fini(&cnex_state); return (err); } return (0); } int _fini(void) { int err; if ((err = mod_remove(&modlinkage)) != 0) return (err); ddi_soft_state_fini(&cnex_state); return (0); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } /* * Callback function invoked by the interrupt redistribution * framework. This will redirect interrupts at CPUs that are * currently available in the system. * * Note: any interrupts with weight greater than or equal to * weight_max must be redistributed when this callback is * invoked with (weight == weight_max) which will be once per * redistribution. */ /*ARGSUSED*/ static void cnex_intr_redist(void *arg, int32_t weight_max, int32_t weight) { cnex_ldc_t *cldcp; cnex_soft_state_t *cnex_ssp = arg; ASSERT(cnex_ssp != NULL); mutex_enter(&cnex_ssp->clist_lock); cldcp = cnex_ssp->clist; while (cldcp != NULL) { mutex_enter(&cldcp->lock); if (cldcp->tx.hdlr && (cldcp->tx.weight == weight || (weight_max == weight && cldcp->tx.weight > weight))) { (void) cnex_intr_new_cpu(cnex_ssp, &cldcp->tx); } if (cldcp->rx.hdlr && (cldcp->rx.weight == weight || (weight_max == weight && cldcp->rx.weight > weight))) { (void) cnex_intr_new_cpu(cnex_ssp, &cldcp->rx); } mutex_exit(&cldcp->lock); /* next channel */ cldcp = cldcp->next; } mutex_exit(&cnex_ssp->clist_lock); } /* * Internal function to replace the CPU used by an interrupt * during interrupt redistribution. */ static int cnex_intr_new_cpu(cnex_soft_state_t *ssp, cnex_intr_t *iinfo) { int intr_state; int rv; /* Determine if the interrupt is enabled */ rv = hvldc_intr_getvalid(ssp->cfghdl, iinfo->ino, &intr_state); if (rv) { DWARN("cnex_intr_new_cpu: rx ino=0x%llx, can't get valid\n", iinfo->ino); return (rv); } /* If it is enabled, disable it */ if (intr_state == HV_INTR_VALID) { rv = cnex_intr_dis_wait(ssp, iinfo); if (rv) { return (rv); } } /* Target the interrupt at a new CPU. */ iinfo->cpuid = intr_dist_cpuid(); (void) hvldc_intr_settarget(ssp->cfghdl, iinfo->ino, iinfo->cpuid); intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip, iinfo->weight); /* Re-enable the interrupt if it was enabled */ if (intr_state == HV_INTR_VALID) { (void) hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino, HV_INTR_VALID); } return (0); } /* * Internal function to disable an interrupt and wait * for any pending interrupts to finish. */ static int cnex_intr_dis_wait(cnex_soft_state_t *ssp, cnex_intr_t *iinfo) { int rv, intr_state, retries; /* disable interrupts */ rv = hvldc_intr_setvalid(ssp->cfghdl, iinfo->ino, HV_INTR_NOTVALID); if (rv) { DWARN("cnex_intr_dis_wait: ino=0x%llx, can't set valid\n", iinfo->ino); return (ENXIO); } /* * Make a best effort to wait for pending interrupts * to finish. There is not much we can do if we timeout. */ retries = 0; do { rv = hvldc_intr_getstate(ssp->cfghdl, iinfo->ino, &intr_state); if (rv) { DWARN("cnex_intr_dis_wait: ino=0x%llx, can't get " "state\n", iinfo->ino); return (ENXIO); } if (intr_state != HV_INTR_DELIVERED_STATE) break; drv_usecwait(cnex_wait_usecs); } while (!panicstr && ++retries <= cnex_wait_retries); return (0); } /* * Returns the interrupt weight to use for the specified devclass. */ static int32_t cnex_class_weight(ldc_dev_t devclass) { int idx; for (idx = 0; idx < CNEX_MAX_DEVS; idx++) { if (devclass == cnex_class_to_intr[idx].devclass) { return (cnex_class_to_intr[idx].weight); } } /* * If this code is reached, the specified devclass is * invalid. New devclasses should be added to * cnex_class_to_intr. */ ASSERT(0); return (0); } /* * Exported interface to register a LDC endpoint with * the channel nexus */ static int cnex_reg_chan(dev_info_t *dip, uint64_t id, ldc_dev_t devclass) { int idx; cnex_ldc_t *cldcp; int listsz, num_nodes, num_channels; md_t *mdp = NULL; mde_cookie_t rootnode, *listp = NULL; uint64_t tmp_id; uint64_t rxino = (uint64_t)-1; uint64_t txino = (uint64_t)-1; cnex_soft_state_t *cnex_ssp; int status, instance; dev_info_t *chan_dip = NULL; /* Get device instance and structure */ instance = ddi_get_instance(dip); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* Check to see if channel is already registered */ mutex_enter(&cnex_ssp->clist_lock); cldcp = cnex_ssp->clist; while (cldcp) { if (cldcp->id == id) { DWARN("cnex_reg_chan: channel 0x%llx exists\n", id); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } cldcp = cldcp->next; } /* Get the Tx/Rx inos from the MD */ if ((mdp = md_get_handle()) == NULL) { DWARN("cnex_reg_chan: cannot init MD\n"); mutex_exit(&cnex_ssp->clist_lock); return (ENXIO); } num_nodes = md_node_count(mdp); ASSERT(num_nodes > 0); listsz = num_nodes * sizeof (mde_cookie_t); listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); rootnode = md_root_node(mdp); /* search for all channel_endpoint nodes */ num_channels = md_scan_dag(mdp, rootnode, md_find_name(mdp, "channel-endpoint"), md_find_name(mdp, "fwd"), listp); if (num_channels <= 0) { DWARN("cnex_reg_chan: invalid channel id\n"); kmem_free(listp, listsz); (void) md_fini_handle(mdp); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } for (idx = 0; idx < num_channels; idx++) { /* Get the channel ID */ status = md_get_prop_val(mdp, listp[idx], "id", &tmp_id); if (status) { DWARN("cnex_reg_chan: cannot read LDC ID\n"); kmem_free(listp, listsz); (void) md_fini_handle(mdp); mutex_exit(&cnex_ssp->clist_lock); return (ENXIO); } if (tmp_id != id) continue; /* Get the Tx and Rx ino */ status = md_get_prop_val(mdp, listp[idx], "tx-ino", &txino); if (status) { DWARN("cnex_reg_chan: cannot read Tx ino\n"); kmem_free(listp, listsz); (void) md_fini_handle(mdp); mutex_exit(&cnex_ssp->clist_lock); return (ENXIO); } status = md_get_prop_val(mdp, listp[idx], "rx-ino", &rxino); if (status) { DWARN("cnex_reg_chan: cannot read Rx ino\n"); kmem_free(listp, listsz); (void) md_fini_handle(mdp); mutex_exit(&cnex_ssp->clist_lock); return (ENXIO); } chan_dip = cnex_find_chan_dip(dip, id, mdp, listp[idx]); ASSERT(chan_dip != NULL); } kmem_free(listp, listsz); (void) md_fini_handle(mdp); /* * check to see if we looped through the list of channel IDs without * matching one (i.e. an 'ino' has not been initialised). */ if ((rxino == -1) || (txino == -1)) { DERR("cnex_reg_chan: no ID matching '%llx' in MD\n", id); mutex_exit(&cnex_ssp->clist_lock); return (ENOENT); } /* Allocate a new channel structure */ cldcp = kmem_zalloc(sizeof (*cldcp), KM_SLEEP); /* Initialize the channel */ mutex_init(&cldcp->lock, NULL, MUTEX_DRIVER, NULL); cldcp->id = id; cldcp->tx.ino = txino; cldcp->rx.ino = rxino; cldcp->devclass = devclass; cldcp->tx.weight = CNEX_TX_INTR_WEIGHT; cldcp->rx.weight = cnex_class_weight(devclass); cldcp->dip = chan_dip; /* add channel to nexus channel list */ cldcp->next = cnex_ssp->clist; cnex_ssp->clist = cldcp; mutex_exit(&cnex_ssp->clist_lock); return (0); } /* * Add Tx/Rx interrupt handler for the channel */ static int cnex_add_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype, uint_t (*hdlr)(), caddr_t arg1, caddr_t arg2) { int rv, idx, pil; cnex_ldc_t *cldcp; cnex_intr_t *iinfo; cnex_soft_state_t *cnex_ssp; int instance; /* Get device instance and structure */ instance = ddi_get_instance(dip); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* get channel info */ mutex_enter(&cnex_ssp->clist_lock); cldcp = cnex_ssp->clist; while (cldcp) { if (cldcp->id == id) break; cldcp = cldcp->next; } if (cldcp == NULL) { DWARN("cnex_add_intr: channel 0x%llx does not exist\n", id); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } mutex_exit(&cnex_ssp->clist_lock); /* get channel lock */ mutex_enter(&cldcp->lock); /* get interrupt type */ if (itype == CNEX_TX_INTR) { iinfo = &(cldcp->tx); } else if (itype == CNEX_RX_INTR) { iinfo = &(cldcp->rx); } else { DWARN("cnex_add_intr: invalid interrupt type\n", id); mutex_exit(&cldcp->lock); return (EINVAL); } /* check if a handler is already added */ if (iinfo->hdlr != 0) { DWARN("cnex_add_intr: interrupt handler exists\n"); mutex_exit(&cldcp->lock); return (EINVAL); } /* save interrupt handler info */ iinfo->hdlr = hdlr; iinfo->arg1 = arg1; iinfo->arg2 = arg2; /* save data for DTrace probes used by intrstat(1m) */ iinfo->dip = cldcp->dip; iinfo->id = cldcp->id; iinfo->icookie = MINVINTR_COOKIE + iinfo->ino; /* * Verify that the ino does not generate a cookie which * is outside the (MINVINTR_COOKIE, MAXIVNUM) range of the * system interrupt table. */ if (iinfo->icookie >= MAXIVNUM || iinfo->icookie < MINVINTR_COOKIE) { DWARN("cnex_add_intr: invalid cookie %x ino %x\n", iinfo->icookie, iinfo->ino); mutex_exit(&cldcp->lock); return (EINVAL); } D1("cnex_add_intr: add hdlr, cfghdl=0x%llx, ino=0x%llx, " "cookie=0x%llx\n", cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie); /* Pick a PIL on the basis of the channel's devclass */ for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) { if (cldcp->devclass == cnex_class_to_intr[idx].devclass) { pil = cnex_class_to_intr[idx].pil; break; } } /* add interrupt to solaris ivec table */ if (add_ivintr(iinfo->icookie, pil, (intrfunc)cnex_intr_wrapper, (caddr_t)iinfo, NULL, NULL) != 0) { DWARN("cnex_add_intr: add_ivintr fail cookie %x ino %x\n", iinfo->icookie, iinfo->ino); mutex_exit(&cldcp->lock); return (EINVAL); } /* set the cookie in the HV */ rv = hvldc_intr_setcookie(cnex_ssp->cfghdl, iinfo->ino, iinfo->icookie); /* pick next CPU in the domain for this channel */ iinfo->cpuid = intr_dist_cpuid(); /* set the target CPU and then enable interrupts */ rv = hvldc_intr_settarget(cnex_ssp->cfghdl, iinfo->ino, iinfo->cpuid); if (rv) { DWARN("cnex_add_intr: ino=0x%llx, cannot set target cpu\n", iinfo->ino); goto hv_error; } rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_IDLE_STATE); if (rv) { DWARN("cnex_add_intr: ino=0x%llx, cannot set state\n", iinfo->ino); goto hv_error; } rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_VALID); if (rv) { DWARN("cnex_add_intr: ino=0x%llx, cannot set valid\n", iinfo->ino); goto hv_error; } intr_dist_cpuid_add_device_weight(iinfo->cpuid, iinfo->dip, iinfo->weight); mutex_exit(&cldcp->lock); return (0); hv_error: (void) rem_ivintr(iinfo->icookie, pil); mutex_exit(&cldcp->lock); return (ENXIO); } /* * Exported interface to unregister a LDC endpoint with * the channel nexus */ static int cnex_unreg_chan(dev_info_t *dip, uint64_t id) { cnex_ldc_t *cldcp, *prev_cldcp; cnex_soft_state_t *cnex_ssp; int instance; /* Get device instance and structure */ instance = ddi_get_instance(dip); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* find and remove channel from list */ mutex_enter(&cnex_ssp->clist_lock); prev_cldcp = NULL; cldcp = cnex_ssp->clist; while (cldcp) { if (cldcp->id == id) break; prev_cldcp = cldcp; cldcp = cldcp->next; } if (cldcp == 0) { DWARN("cnex_unreg_chan: invalid channel %d\n", id); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } if (cldcp->tx.hdlr || cldcp->rx.hdlr) { DWARN("cnex_unreg_chan: handlers still exist: chan %lx\n", id); mutex_exit(&cnex_ssp->clist_lock); return (ENXIO); } if (prev_cldcp) prev_cldcp->next = cldcp->next; else cnex_ssp->clist = cldcp->next; mutex_exit(&cnex_ssp->clist_lock); /* destroy mutex */ mutex_destroy(&cldcp->lock); /* free channel */ kmem_free(cldcp, sizeof (*cldcp)); return (0); } /* * Remove Tx/Rx interrupt handler for the channel */ static int cnex_rem_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype) { int rv, idx, pil; cnex_ldc_t *cldcp; cnex_intr_t *iinfo; cnex_soft_state_t *cnex_ssp; int instance, istate; /* Get device instance and structure */ instance = ddi_get_instance(dip); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* get channel info */ mutex_enter(&cnex_ssp->clist_lock); cldcp = cnex_ssp->clist; while (cldcp) { if (cldcp->id == id) break; cldcp = cldcp->next; } if (cldcp == NULL) { DWARN("cnex_rem_intr: channel 0x%llx does not exist\n", id); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } mutex_exit(&cnex_ssp->clist_lock); /* get rid of the channel intr handler */ mutex_enter(&cldcp->lock); /* get interrupt type */ if (itype == CNEX_TX_INTR) { iinfo = &(cldcp->tx); } else if (itype == CNEX_RX_INTR) { iinfo = &(cldcp->rx); } else { DWARN("cnex_rem_intr: invalid interrupt type\n"); mutex_exit(&cldcp->lock); return (EINVAL); } D1("cnex_rem_intr: interrupt ino=0x%x\n", iinfo->ino); /* check if a handler is already added */ if (iinfo->hdlr == 0) { DWARN("cnex_rem_intr: interrupt handler does not exist\n"); mutex_exit(&cldcp->lock); return (EINVAL); } D1("cnex_rem_intr: set intr to invalid ino=0x%x\n", iinfo->ino); rv = hvldc_intr_setvalid(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_NOTVALID); if (rv) { DWARN("cnex_rem_intr: cannot set valid ino=%x\n", iinfo->ino); mutex_exit(&cldcp->lock); return (ENXIO); } /* * Check if there are pending interrupts. If interrupts are * pending return EAGAIN. */ rv = hvldc_intr_getstate(cnex_ssp->cfghdl, iinfo->ino, &istate); if (rv) { DWARN("cnex_rem_intr: ino=0x%llx, cannot get state\n", iinfo->ino); mutex_exit(&cldcp->lock); return (ENXIO); } /* if interrupts are still pending print warning */ if (istate != HV_INTR_IDLE_STATE) { DWARN("cnex_rem_intr: cannot remove intr busy ino=%x\n", iinfo->ino); mutex_exit(&cldcp->lock); return (EAGAIN); } /* Pick a PIL on the basis of the channel's devclass */ for (idx = 0, pil = PIL_3; idx < CNEX_MAX_DEVS; idx++) { if (cldcp->devclass == cnex_class_to_intr[idx].devclass) { pil = cnex_class_to_intr[idx].pil; break; } } intr_dist_cpuid_rem_device_weight(iinfo->cpuid, iinfo->dip); /* remove interrupt */ (void) rem_ivintr(iinfo->icookie, pil); /* clear interrupt info */ bzero(iinfo, sizeof (*iinfo)); mutex_exit(&cldcp->lock); return (0); } /* * Clear pending Tx/Rx interrupt */ static int cnex_clr_intr(dev_info_t *dip, uint64_t id, cnex_intrtype_t itype) { int rv; cnex_ldc_t *cldcp; cnex_intr_t *iinfo; cnex_soft_state_t *cnex_ssp; int instance; /* Get device instance and structure */ instance = ddi_get_instance(dip); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* get channel info */ mutex_enter(&cnex_ssp->clist_lock); cldcp = cnex_ssp->clist; while (cldcp) { if (cldcp->id == id) break; cldcp = cldcp->next; } if (cldcp == NULL) { DWARN("cnex_clr_intr: channel 0x%llx does not exist\n", id); mutex_exit(&cnex_ssp->clist_lock); return (EINVAL); } mutex_exit(&cnex_ssp->clist_lock); mutex_enter(&cldcp->lock); /* get interrupt type */ if (itype == CNEX_TX_INTR) { iinfo = &(cldcp->tx); } else if (itype == CNEX_RX_INTR) { iinfo = &(cldcp->rx); } else { DWARN("cnex_clr_intr: invalid interrupt type\n"); mutex_exit(&cldcp->lock); return (EINVAL); } D1("%s: interrupt ino=0x%x\n", __func__, iinfo->ino); /* check if a handler is already added */ if (iinfo->hdlr == 0) { DWARN("cnex_clr_intr: interrupt handler does not exist\n"); mutex_exit(&cldcp->lock); return (EINVAL); } rv = hvldc_intr_setstate(cnex_ssp->cfghdl, iinfo->ino, HV_INTR_IDLE_STATE); if (rv) { DWARN("cnex_clr_intr: cannot clear interrupt state\n"); mutex_exit(&cldcp->lock); return (ENXIO); } mutex_exit(&cldcp->lock); return (0); } /* * Channel nexus interrupt handler wrapper */ static uint_t cnex_intr_wrapper(caddr_t arg) { int res; uint_t (*handler)(); caddr_t handler_arg1; caddr_t handler_arg2; cnex_intr_t *iinfo = (cnex_intr_t *)arg; ASSERT(iinfo != NULL); handler = iinfo->hdlr; handler_arg1 = iinfo->arg1; handler_arg2 = iinfo->arg2; /* * The 'interrupt__start' and 'interrupt__complete' probes * are provided to support 'intrstat' command. These probes * help monitor the interrupts on a per device basis only. * In order to provide the ability to monitor the * activity on a per channel basis, two additional * probes('channelintr__start','channelintr__complete') * are provided here. */ DTRACE_PROBE4(channelintr__start, uint64_t, iinfo->id, cnex_intr_t *, iinfo, void *, handler, caddr_t, handler_arg1); DTRACE_PROBE4(interrupt__start, dev_info_t, iinfo->dip, void *, handler, caddr_t, handler_arg1, caddr_t, handler_arg2); D1("cnex_intr_wrapper:ino=0x%llx invoke client handler\n", iinfo->ino); res = (*handler)(handler_arg1, handler_arg2); DTRACE_PROBE4(interrupt__complete, dev_info_t, iinfo->dip, void *, handler, caddr_t, handler_arg1, int, res); DTRACE_PROBE4(channelintr__complete, uint64_t, iinfo->id, cnex_intr_t *, iinfo, void *, handler, caddr_t, handler_arg1); return (res); } /*ARGSUSED*/ static int cnex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { int rv, instance, reglen; cnex_regspec_t *reg_p; ldc_cnex_t cinfo; cnex_soft_state_t *cnex_ssp; switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: return (DDI_SUCCESS); default: return (DDI_FAILURE); } /* * Get the instance specific soft state structure. * Save the devi for this instance in the soft_state data. */ instance = ddi_get_instance(devi); if (ddi_soft_state_zalloc(cnex_state, instance) != DDI_SUCCESS) return (DDI_FAILURE); cnex_ssp = ddi_get_soft_state(cnex_state, instance); cnex_ssp->devi = devi; cnex_ssp->clist = NULL; if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "reg", (caddr_t)®_p, ®len) != DDI_SUCCESS) { return (DDI_FAILURE); } /* get the sun4v config handle for this device */ cnex_ssp->cfghdl = SUN4V_REG_SPEC2CFG_HDL(reg_p->physaddr); kmem_free(reg_p, reglen); D1("cnex_attach: cfghdl=0x%llx\n", cnex_ssp->cfghdl); /* init channel list mutex */ mutex_init(&cnex_ssp->clist_lock, NULL, MUTEX_DRIVER, NULL); /* Register with LDC module */ cinfo.dip = devi; cinfo.reg_chan = cnex_reg_chan; cinfo.unreg_chan = cnex_unreg_chan; cinfo.add_intr = cnex_add_intr; cinfo.rem_intr = cnex_rem_intr; cinfo.clr_intr = cnex_clr_intr; /* * LDC register will fail if an nexus instance had already * registered with the LDC framework */ rv = ldc_register(&cinfo); if (rv) { DWARN("cnex_attach: unable to register with LDC\n"); ddi_soft_state_free(cnex_state, instance); mutex_destroy(&cnex_ssp->clist_lock); return (DDI_FAILURE); } if (ddi_create_minor_node(devi, "devctl", S_IFCHR, instance, DDI_NT_NEXUS, 0) != DDI_SUCCESS) { ddi_remove_minor_node(devi, NULL); ddi_soft_state_free(cnex_state, instance); mutex_destroy(&cnex_ssp->clist_lock); return (DDI_FAILURE); } /* Add interrupt redistribution callback. */ intr_dist_add_weighted(cnex_intr_redist, cnex_ssp); ddi_report_dev(devi); return (DDI_SUCCESS); } /*ARGSUSED*/ static int cnex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) { int instance; ldc_cnex_t cinfo; cnex_soft_state_t *cnex_ssp; switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: return (DDI_SUCCESS); default: return (DDI_FAILURE); } instance = ddi_get_instance(devi); cnex_ssp = ddi_get_soft_state(cnex_state, instance); /* check if there are any channels still registered */ if (cnex_ssp->clist) { cmn_err(CE_WARN, "?cnex_dettach: channels registered %d\n", ddi_get_instance(devi)); return (DDI_FAILURE); } /* Unregister with LDC module */ cinfo.dip = devi; (void) ldc_unregister(&cinfo); /* Remove interrupt redistribution callback. */ intr_dist_rem_weighted(cnex_intr_redist, cnex_ssp); /* destroy mutex */ mutex_destroy(&cnex_ssp->clist_lock); /* free soft state structure */ ddi_soft_state_free(cnex_state, instance); return (DDI_SUCCESS); } /*ARGSUSED*/ static int cnex_open(dev_t *devp, int flags, int otyp, cred_t *credp) { int instance; if (otyp != OTYP_CHR) return (EINVAL); instance = getminor(*devp); if (ddi_get_soft_state(cnex_state, instance) == NULL) return (ENXIO); return (0); } /*ARGSUSED*/ static int cnex_close(dev_t dev, int flags, int otyp, cred_t *credp) { int instance; if (otyp != OTYP_CHR) return (EINVAL); instance = getminor(dev); if (ddi_get_soft_state(cnex_state, instance) == NULL) return (ENXIO); return (0); } /*ARGSUSED*/ static int cnex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_p) { int instance; cnex_soft_state_t *cnex_ssp; instance = getminor(dev); if ((cnex_ssp = ddi_get_soft_state(cnex_state, instance)) == NULL) return (ENXIO); ASSERT(cnex_ssp->devi); return (ndi_devctl_ioctl(cnex_ssp->devi, cmd, arg, mode, 0)); } static int cnex_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, void *arg, void *result) { char name[MAXNAMELEN]; uint32_t reglen; int *cnex_regspec; switch (ctlop) { case DDI_CTLOPS_REPORTDEV: if (rdip == NULL) return (DDI_FAILURE); cmn_err(CE_CONT, "?channel-device: %s%d\n", ddi_driver_name(rdip), ddi_get_instance(rdip)); return (DDI_SUCCESS); case DDI_CTLOPS_INITCHILD: { dev_info_t *child = (dev_info_t *)arg; if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS, "reg", &cnex_regspec, ®len) != DDI_SUCCESS) { return (DDI_FAILURE); } (void) snprintf(name, sizeof (name), "%x", *cnex_regspec); ddi_set_name_addr(child, name); ddi_set_parent_data(child, NULL); ddi_prop_free(cnex_regspec); return (DDI_SUCCESS); } case DDI_CTLOPS_UNINITCHILD: { dev_info_t *child = (dev_info_t *)arg; NDI_CONFIG_DEBUG((CE_NOTE, "DDI_CTLOPS_UNINITCHILD(%s, instance=%d)", ddi_driver_name(child), DEVI(child)->devi_instance)); ddi_set_name_addr(child, NULL); return (DDI_SUCCESS); } case DDI_CTLOPS_DMAPMAPC: case DDI_CTLOPS_REPORTINT: case DDI_CTLOPS_REGSIZE: case DDI_CTLOPS_NREGS: case DDI_CTLOPS_SIDDEV: case DDI_CTLOPS_SLAVEONLY: case DDI_CTLOPS_AFFINITY: case DDI_CTLOPS_POKE: case DDI_CTLOPS_PEEK: /* * These ops correspond to functions that "shouldn't" be called * by a channel-device driver. So we whine when we're called. */ cmn_err(CE_WARN, "%s%d: invalid op (%d) from %s%d\n", ddi_driver_name(dip), ddi_get_instance(dip), ctlop, ddi_driver_name(rdip), ddi_get_instance(rdip)); return (DDI_FAILURE); case DDI_CTLOPS_ATTACH: case DDI_CTLOPS_BTOP: case DDI_CTLOPS_BTOPR: case DDI_CTLOPS_DETACH: case DDI_CTLOPS_DVMAPAGESIZE: case DDI_CTLOPS_IOMIN: case DDI_CTLOPS_POWER: case DDI_CTLOPS_PTOB: default: /* * Everything else (e.g. PTOB/BTOP/BTOPR requests) we pass up */ return (ddi_ctlops(dip, rdip, ctlop, arg, result)); } } /* * cnex_find_chan_dip -- Find the dip of a device that is corresponding * to the specific channel. Below are the details on how the dip * is derived. * * - In the MD, the cfg-handle is expected to be unique for * virtual-device nodes that have the same 'name' property value. * This value is expected to be the same as that of "reg" property * of the corresponding OBP device node. * * - The value of the 'name' property of a virtual-device node * in the MD is expected to be the same for the corresponding * OBP device node. * * - Find the virtual-device node corresponding to a channel-endpoint * by walking backwards. Then obtain the values for the 'name' and * 'cfg-handle' properties. * * - Walk all the children of the cnex, find a matching dip which * has the same 'name' and 'reg' property values. * * - The channels that have no corresponding device driver are * treated as if they correspond to the cnex driver, * that is, return cnex dip for them. This means, the * cnex acts as an umbrella device driver. Note, this is * for 'intrstat' statistics purposes only. As a result of this, * the 'intrstat' shows cnex as the device that is servicing the * interrupts corresponding to these channels. * * For now, only one such case is known, that is, the channels that * are used by the "domain-services". */ static dev_info_t * cnex_find_chan_dip(dev_info_t *dip, uint64_t chan_id, md_t *mdp, mde_cookie_t mde) { int listsz; int num_nodes; int num_devs; uint64_t cfghdl; char *md_name; mde_cookie_t *listp; dev_info_t *cdip = NULL; num_nodes = md_node_count(mdp); ASSERT(num_nodes > 0); listsz = num_nodes * sizeof (mde_cookie_t); listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); num_devs = md_scan_dag(mdp, mde, md_find_name(mdp, "virtual-device"), md_find_name(mdp, "back"), listp); ASSERT(num_devs <= 1); if (num_devs <= 0) { DWARN("cnex_find_chan_dip:channel(0x%llx): " "No virtual-device found\n", chan_id); goto fdip_exit; } if (md_get_prop_str(mdp, listp[0], "name", &md_name) != 0) { DWARN("cnex_find_chan_dip:channel(0x%llx): " "name property not found\n", chan_id); goto fdip_exit; } D1("cnex_find_chan_dip: channel(0x%llx): virtual-device " "name property value = %s\n", chan_id, md_name); if (md_get_prop_val(mdp, listp[0], "cfg-handle", &cfghdl) != 0) { DWARN("cnex_find_chan_dip:channel(0x%llx): virtual-device's " "cfg-handle property not found\n", chan_id); goto fdip_exit; } D1("cnex_find_chan_dip:channel(0x%llx): virtual-device cfg-handle " " property value = 0x%x\n", chan_id, cfghdl); for (cdip = ddi_get_child(dip); cdip != NULL; cdip = ddi_get_next_sibling(cdip)) { int *cnex_regspec; uint32_t reglen; char *dev_name; if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, "name", &dev_name) != DDI_PROP_SUCCESS) { DWARN("cnex_find_chan_dip: name property not" " found for dip(0x%p)\n", cdip); continue; } if (strcmp(md_name, dev_name) != 0) { ddi_prop_free(dev_name); continue; } ddi_prop_free(dev_name); if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, "reg", &cnex_regspec, ®len) != DDI_SUCCESS) { DWARN("cnex_find_chan_dip: reg property not" " found for dip(0x%p)\n", cdip); continue; } if (*cnex_regspec == cfghdl) { D1("cnex_find_chan_dip:channel(0x%llx): found " "dip(0x%p) drvname=%s\n", chan_id, cdip, ddi_driver_name(cdip)); ddi_prop_free(cnex_regspec); break; } ddi_prop_free(cnex_regspec); } fdip_exit: if (cdip == NULL) { /* * If a virtual-device node exists but no dip found, * then for now print a DEBUG error message only. */ if (num_devs > 0) { DERR("cnex_find_chan_dip:channel(0x%llx): " "No device found\n", chan_id); } /* If no dip was found, return cnex device's dip. */ cdip = dip; } kmem_free(listp, listsz); D1("cnex_find_chan_dip:channel(0x%llx): returning dip=0x%p\n", chan_id, cdip); return (cdip); } /* -------------------------------------------------------------------------- */