/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function prototypes */ static int mc_open(dev_t *, int, int, cred_t *); static int mc_close(dev_t, int, int, cred_t *); static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); static int mc_attach(dev_info_t *, ddi_attach_cmd_t); static int mc_detach(dev_info_t *, ddi_detach_cmd_t); /* * Configuration data structures */ static struct cb_ops mc_cb_ops = { mc_open, /* open */ mc_close, /* close */ nulldev, /* strategy */ nulldev, /* print */ nodev, /* dump */ nulldev, /* read */ nulldev, /* write */ mc_ioctl, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ nochpoll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flag */ CB_REV, /* rev */ nodev, /* cb_aread */ nodev /* cb_awrite */ }; static struct dev_ops mc_ops = { DEVO_REV, /* rev */ 0, /* refcnt */ ddi_getinfo_1to1, /* getinfo */ nulldev, /* identify */ nulldev, /* probe */ mc_attach, /* attach */ mc_detach, /* detach */ nulldev, /* reset */ &mc_cb_ops, /* cb_ops */ (struct bus_ops *)0, /* bus_ops */ nulldev /* power */ }; /* * Driver globals */ static void *mcp; static int nmcs = 0; static int seg_id = 0; static int nsegments = 0; static uint64_t memsize = 0; static int maxbanks = 0; static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail; static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail; static mc_dlist_t *device_head, *device_tail; static kmutex_t mcmutex; static kmutex_t mcdatamutex; static krwlock_t mcdimmsids_rw; /* pointer to cache of DIMM serial ids */ static dimm_sid_cache_t *mc_dimm_sids; static int max_entries; extern struct mod_ops mod_driverops; static struct modldrv modldrv = { &mod_driverops, /* module type, this one is a driver */ "Memory-controller", /* module name */ &mc_ops, /* driver ops */ }; static struct modlinkage modlinkage = { MODREV_1, /* rev */ (void *)&modldrv, NULL }; static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp); static int mc_get_mem_info(int synd_code, uint64_t paddr, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp); static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp); static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp); static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr); static int mc_init_sid_cache(void); static int mc_get_mcregs(struct mc_soft_state *); static void mc_construct(int mc_id, void *dimminfop); static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop); static void mlayout_del(int mc_id, int delete); static struct seg_info *seg_match_base(u_longlong_t base); static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail); static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail); static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head); static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm); static int mc_populate_sid_cache(void); static int mc_get_sid_cache_index(int mcid); static void mc_update_bank(struct bank_info *bank); #pragma weak p2get_mem_unum #pragma weak p2get_mem_info #pragma weak p2get_mem_sid #pragma weak p2get_mem_offset #pragma weak p2get_mem_addr #pragma weak p2init_sid_cache #pragma weak plat_add_mem_unum_label #pragma weak plat_alloc_sid_cache #pragma weak plat_populate_sid_cache #define QWORD_SIZE 144 #define QWORD_SIZE_BYTES (QWORD_SIZE / 8) /* * These are the module initialization routines. */ int _init(void) { int error; if ((error = ddi_soft_state_init(&mcp, sizeof (struct mc_soft_state), 1)) != 0) return (error); error = mod_install(&modlinkage); if (error == 0) { mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL); rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL); } return (error); } int _fini(void) { int error; if ((error = mod_remove(&modlinkage)) != 0) return (error); ddi_soft_state_fini(&mcp); mutex_destroy(&mcmutex); mutex_destroy(&mcdatamutex); rw_destroy(&mcdimmsids_rw); if (mc_dimm_sids) kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) * max_entries); return (0); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } static int mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { struct mc_soft_state *softsp; struct dimm_info *dimminfop; int instance, len, err; /* get the instance of this devi */ instance = ddi_get_instance(devi); switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: /* get the soft state pointer for this device node */ softsp = ddi_get_soft_state(mcp, instance); DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n", instance)); /* * During resume, the source and target board's bank_infos * need to be updated with the new mc MADR values. This is * implemented with existing functionality by first removing * the props and allocated data structs, and then adding them * back in. */ if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, MEM_CFG_PROP_NAME) == 1) { (void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip, MEM_CFG_PROP_NAME); } mlayout_del(softsp->portid, 0); if (mc_get_mcregs(softsp) == -1) { cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n", instance); } return (DDI_SUCCESS); default: return (DDI_FAILURE); } if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS) return (DDI_FAILURE); softsp = ddi_get_soft_state(mcp, instance); /* Set the dip in the soft state */ softsp->dip = devi; if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS, "portid", -1)) == -1) { DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property", instance, "portid")); goto bad; } DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n", instance, softsp->portid, CPU->cpu_id)); /* map in the registers for this device. */ if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) { DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers", instance)); goto bad; } /* * Get the label of dimms and pin routing information at memory-layout * property if the memory controller is enabled. * * Basically every memory-controller node on every machine should * have one of these properties unless the memory controller is * physically not capable of having memory attached to it, e.g. * Excalibur's slave processor. */ err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS, "memory-layout", (caddr_t)&dimminfop, &len); if (err == DDI_PROP_SUCCESS) { /* * Set the pointer and size of property in the soft state */ softsp->memlayoutp = dimminfop; softsp->size = len; } else if (err == DDI_PROP_NOT_FOUND) { /* * This is a disable MC. Clear out the pointer and size * of property in the soft state */ softsp->memlayoutp = NULL; softsp->size = 0; } else { DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n", instance, (void *)dimminfop)); goto bad2; } DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n", instance, (void *)dimminfop, *(uint64_t *)dimminfop, len)); /* Get MC registers and construct all needed data structure */ if (mc_get_mcregs(softsp) == -1) goto bad1; mutex_enter(&mcmutex); if (nmcs == 1) { if (&p2get_mem_unum) p2get_mem_unum = mc_get_mem_unum; if (&p2get_mem_info) p2get_mem_info = mc_get_mem_info; if (&p2get_mem_sid) p2get_mem_sid = mc_get_mem_sid; if (&p2get_mem_offset) p2get_mem_offset = mc_get_mem_offset; if (&p2get_mem_addr) p2get_mem_addr = mc_get_mem_addr; if (&p2init_sid_cache) p2init_sid_cache = mc_init_sid_cache; } mutex_exit(&mcmutex); /* * Update DIMM serial id information if the DIMM serial id * cache has already been initialized. */ if (mc_dimm_sids) { rw_enter(&mcdimmsids_rw, RW_WRITER); (void) mc_populate_sid_cache(); rw_exit(&mcdimmsids_rw); } if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance, "ddi_mem_ctrl", 0) != DDI_SUCCESS) { DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node" " failed \n")); goto bad1; } ddi_report_dev(devi); return (DDI_SUCCESS); bad1: /* release all allocated data struture for this MC */ mlayout_del(softsp->portid, 0); if (softsp->memlayoutp != NULL) kmem_free(softsp->memlayoutp, softsp->size); /* remove the libdevinfo property */ if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, MEM_CFG_PROP_NAME) == 1) { (void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip, MEM_CFG_PROP_NAME); } bad2: /* unmap the registers for this device. */ ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0); bad: ddi_soft_state_free(mcp, instance); return (DDI_FAILURE); } /* ARGSUSED */ static int mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) { int instance; struct mc_soft_state *softsp; /* get the instance of this devi */ instance = ddi_get_instance(devi); /* get the soft state pointer for this device node */ softsp = ddi_get_soft_state(mcp, instance); switch (cmd) { case DDI_SUSPEND: return (DDI_SUCCESS); case DDI_DETACH: break; default: return (DDI_FAILURE); } DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n", instance, softsp->portid, softsp->memlayoutp)); /* remove the libdevinfo property */ if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, MEM_CFG_PROP_NAME) == 1) { (void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip, MEM_CFG_PROP_NAME); } /* release all allocated data struture for this MC */ mlayout_del(softsp->portid, 1); if (softsp->memlayoutp != NULL) kmem_free(softsp->memlayoutp, softsp->size); /* unmap the registers */ ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0); mutex_enter(&mcmutex); if (nmcs == 0) { if (&p2get_mem_unum) p2get_mem_unum = NULL; if (&p2get_mem_info) p2get_mem_info = NULL; if (&p2get_mem_sid) p2get_mem_sid = NULL; if (&p2get_mem_offset) p2get_mem_offset = NULL; if (&p2get_mem_addr) p2get_mem_addr = NULL; if (&p2init_sid_cache) p2init_sid_cache = NULL; } mutex_exit(&mcmutex); ddi_remove_minor_node(devi, NULL); /* free up the soft state */ ddi_soft_state_free(mcp, instance); return (DDI_SUCCESS); } /* ARGSUSED */ static int mc_open(dev_t *devp, int flag, int otyp, cred_t *credp) { /* verify that otyp is appropriate */ if (otyp != OTYP_CHR) { return (EINVAL); } return (0); } /* ARGSUSED */ static int mc_close(dev_t devp, int flag, int otyp, cred_t *credp) { return (0); } /* * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP, * MCIOC_CTRLCONF, MCIOC_CONTROL. * * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are * associated with various length struct. If given number is less than the * number in kernel, update the number and return EINVAL so that user could * allocate enough space for it. * */ /* ARGSUSED */ static int mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p) { size_t size; struct mc_memconf mcmconf; struct mc_memory *mcmem, mcmem_in; struct mc_segment *mcseg, mcseg_in; struct mc_bank mcbank; struct mc_devgrp mcdevgrp; struct mc_ctrlconf *mcctrlconf, mcctrlconf_in; struct mc_control *mccontrol, mccontrol_in; struct seg_info *seg = NULL; struct bank_info *bank = NULL; struct dgrp_info *dgrp = NULL; struct mctrl_info *mcport; mc_dlist_t *mctrl; int i, status = 0; cpu_t *cpu; switch (cmd) { case MCIOC_MEMCONF: mutex_enter(&mcdatamutex); mcmconf.nmcs = nmcs; mcmconf.nsegments = nsegments; mcmconf.nbanks = maxbanks; mcmconf.ndevgrps = NDGRPS; mcmconf.ndevs = NDIMMS; mcmconf.len_dev = MAX_DEVLEN; mcmconf.xfer_size = TRANSFER_SIZE; mutex_exit(&mcdatamutex); if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf))) return (EFAULT); return (0); /* * input: nsegments and allocate space for various length of segmentids * * return 0: size, number of segments, and all segment ids, * where glocal and local ids are identical. * EINVAL: if the given nsegments is less than that in kernel and * nsegments of struct will be updated. * EFAULT: if other errors in kernel. */ case MCIOC_MEM: if (copyin((void *)arg, &mcmem_in, sizeof (struct mc_memory)) != 0) return (EFAULT); mutex_enter(&mcdatamutex); if (mcmem_in.nsegments < nsegments) { mcmem_in.nsegments = nsegments; if (copyout(&mcmem_in, (void *)arg, sizeof (struct mc_memory))) status = EFAULT; else status = EINVAL; mutex_exit(&mcdatamutex); return (status); } size = sizeof (struct mc_memory) + (nsegments - 1) * sizeof (mcmem->segmentids[0]); mcmem = kmem_zalloc(size, KM_SLEEP); mcmem->size = memsize; mcmem->nsegments = nsegments; seg = (struct seg_info *)seg_head; for (i = 0; i < nsegments; i++) { ASSERT(seg != NULL); mcmem->segmentids[i].globalid = seg->seg_node.id; mcmem->segmentids[i].localid = seg->seg_node.id; seg = (struct seg_info *)seg->seg_node.next; } mutex_exit(&mcdatamutex); if (copyout(mcmem, (void *)arg, size)) status = EFAULT; kmem_free(mcmem, size); return (status); /* * input: id, nbanks and allocate space for various length of bankids * * return 0: base, size, number of banks, and all bank ids, * where global id is unique of all banks and local id * is only unique for mc. * EINVAL: either id isn't found or if given nbanks is less than * that in kernel and nbanks of struct will be updated. * EFAULT: if other errors in kernel. */ case MCIOC_SEG: if (copyin((void *)arg, &mcseg_in, sizeof (struct mc_segment)) != 0) return (EFAULT); mutex_enter(&mcdatamutex); if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id, seg_head)) == NULL) { DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, " "id %d\n", mcseg_in.id)); mutex_exit(&mcdatamutex); return (EFAULT); } if (mcseg_in.nbanks < seg->nbanks) { mcseg_in.nbanks = seg->nbanks; if (copyout(&mcseg_in, (void *)arg, sizeof (struct mc_segment))) status = EFAULT; else status = EINVAL; mutex_exit(&mcdatamutex); return (status); } size = sizeof (struct mc_segment) + (seg->nbanks - 1) * sizeof (mcseg->bankids[0]); mcseg = kmem_zalloc(size, KM_SLEEP); mcseg->id = seg->seg_node.id; mcseg->ifactor = seg->ifactor; mcseg->base = seg->base; mcseg->size = seg->size; mcseg->nbanks = seg->nbanks; bank = seg->hb_inseg; DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n", seg->nbanks, (void *)seg, (void *)bank)); i = 0; while (bank != NULL) { DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n", i, bank->bank_node.id)); mcseg->bankids[i].globalid = bank->bank_node.id; mcseg->bankids[i++].localid = bank->local_id; bank = bank->n_inseg; } ASSERT(i == seg->nbanks); mutex_exit(&mcdatamutex); if (copyout(mcseg, (void *)arg, size)) status = EFAULT; kmem_free(mcseg, size); return (status); /* * input: id * * return 0: mask, match, size, and devgrpid, * where global id is unique of all devgrps and local id * is only unique for mc. * EINVAL: if id isn't found * EFAULT: if other errors in kernel. */ case MCIOC_BANK: if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0) return (EFAULT); DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id)); mutex_enter(&mcdatamutex); if ((bank = (struct bank_info *)mc_node_get(mcbank.id, bank_head)) == NULL) { mutex_exit(&mcdatamutex); return (EINVAL); } DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n", bank->bank_node.id, (void *)bank, bank->valid)); /* * If (Physic Address & MASK) == MATCH, Physic Address is * located at this bank. The lower physical address bits * are at [9-6]. */ mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >> MADR_LK_SHIFT))) << MADR_LPA_SHIFT; mcbank.match = bank->lm << MADR_LPA_SHIFT; mcbank.size = bank->size; mcbank.devgrpid.globalid = bank->devgrp_id; mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS; mutex_exit(&mcdatamutex); if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank))) return (EFAULT); return (0); /* * input:id and allocate space for various length of deviceids * * return 0: size and number of devices. * EINVAL: id isn't found * EFAULT: if other errors in kernel. */ case MCIOC_DEVGRP: if (copyin((void *)arg, &mcdevgrp, sizeof (struct mc_devgrp)) != 0) return (EFAULT); mutex_enter(&mcdatamutex); if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id, dgrp_head)) == NULL) { DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id " "%d\n", mcdevgrp.id)); mutex_exit(&mcdatamutex); return (EINVAL); } mcdevgrp.ndevices = dgrp->ndevices; mcdevgrp.size = dgrp->size; mutex_exit(&mcdatamutex); if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp))) status = EFAULT; return (status); /* * input: nmcs and allocate space for various length of mcids * * return 0: number of mc, and all mcids, * where glocal and local ids are identical. * EINVAL: if the given nmcs is less than that in kernel and * nmcs of struct will be updated. * EFAULT: if other errors in kernel. */ case MCIOC_CTRLCONF: if (copyin((void *)arg, &mcctrlconf_in, sizeof (struct mc_ctrlconf)) != 0) return (EFAULT); mutex_enter(&mcdatamutex); if (mcctrlconf_in.nmcs < nmcs) { mcctrlconf_in.nmcs = nmcs; if (copyout(&mcctrlconf_in, (void *)arg, sizeof (struct mc_ctrlconf))) status = EFAULT; else status = EINVAL; mutex_exit(&mcdatamutex); return (status); } /* * Cannot just use the size of the struct because of the various * length struct */ size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) * sizeof (mcctrlconf->mcids[0])); mcctrlconf = kmem_zalloc(size, KM_SLEEP); mcctrlconf->nmcs = nmcs; /* Get all MC ids and add to mcctrlconf */ mctrl = mctrl_head; i = 0; while (mctrl != NULL) { mcctrlconf->mcids[i].globalid = mctrl->id; mcctrlconf->mcids[i].localid = mctrl->id; i++; mctrl = mctrl->next; } ASSERT(i == nmcs); mutex_exit(&mcdatamutex); if (copyout(mcctrlconf, (void *)arg, size)) status = EFAULT; kmem_free(mcctrlconf, size); return (status); /* * input:id, ndevgrps and allocate space for various length of devgrpids * * return 0: number of devgrp, and all devgrpids, * is unique of all devgrps and local id is only unique * for mc. * EINVAL: either if id isn't found or if the given ndevgrps is * less than that in kernel and ndevgrps of struct will * be updated. * EFAULT: if other errors in kernel. */ case MCIOC_CONTROL: if (copyin((void *)arg, &mccontrol_in, sizeof (struct mc_control)) != 0) return (EFAULT); mutex_enter(&mcdatamutex); if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id, mctrl_head)) == NULL) { mutex_exit(&mcdatamutex); return (EINVAL); } /* * mcport->ndevgrps zero means Memory Controller is disable. */ if ((mccontrol_in.ndevgrps < mcport->ndevgrps) || (mcport->ndevgrps == 0)) { mccontrol_in.ndevgrps = mcport->ndevgrps; if (copyout(&mccontrol_in, (void *)arg, sizeof (struct mc_control))) status = EFAULT; else if (mcport->ndevgrps != 0) status = EINVAL; mutex_exit(&mcdatamutex); return (status); } size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) * sizeof (mccontrol->devgrpids[0]); mccontrol = kmem_zalloc(size, KM_SLEEP); mccontrol->id = mcport->mctrl_node.id; mccontrol->ndevgrps = mcport->ndevgrps; for (i = 0; i < mcport->ndevgrps; i++) { mccontrol->devgrpids[i].globalid = mcport->devgrpids[i]; mccontrol->devgrpids[i].localid = mcport->devgrpids[i] % NDGRPS; DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n", *(uint64_t *)&mccontrol->devgrpids[i])); } mutex_exit(&mcdatamutex); if (copyout(mccontrol, (void *)arg, size)) status = EFAULT; kmem_free(mccontrol, size); return (status); /* * input:id * * return 0: CPU flushed successfully. * EINVAL: the id wasn't found */ case MCIOC_ECFLUSH: mutex_enter(&cpu_lock); cpu = cpu_get((processorid_t)arg); mutex_exit(&cpu_lock); if (cpu == NULL) return (EINVAL); xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0); return (0); default: DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n")); return (EFAULT); } } /* * Get Memory Address Decoding Registers and construct list. * flag is to workaround Cheetah's restriction where register cannot be mapped * if port id(MC registers on it) == cpu id(process is running on it). */ static int mc_get_mcregs(struct mc_soft_state *softsp) { int i; int err = 0; uint64_t madreg; uint64_t ma_reg_array[NBANKS]; /* there are NBANKS of madrs */ /* Construct lists for MC, mctrl_info, dgrp_info, and device_info */ mc_construct(softsp->portid, softsp->memlayoutp); /* * If memlayoutp is NULL, the Memory Controller is disable, and * doesn't need to create any bank and segment. */ if (softsp->memlayoutp == NULL) goto exit; /* * Get the content of 4 Memory Address Decoding Registers, and * construct lists of logical banks and segments. */ for (i = 0; i < NBANKS; i++) { DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d " "cpu=%d\n", (void *)softsp->mc_base, softsp->portid, CPU->cpu_id)); kpreempt_disable(); if (softsp->portid == (cpunodes[CPU->cpu_id].portid)) madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET)); else madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET + (i * REGOFFSET))); kpreempt_enable(); DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg " "reg=0x%lx\n", softsp->memlayoutp, madreg)); ma_reg_array[i] = madreg; if ((err = mlayout_add(softsp->portid, i, madreg, softsp->memlayoutp)) == -1) break; } /* * Create the logical bank property for this mc node. This * property is an encoded array of the madr for each logical * bank (there are NBANKS of these). */ if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, MEM_CFG_PROP_NAME) != 1) { (void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip, DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME, (caddr_t)&ma_reg_array, sizeof (ma_reg_array)); } exit: if (!err) { mutex_enter(&mcdatamutex); nmcs++; mutex_exit(&mcdatamutex); } return (err); } /* * Translate a pair to a physical address. */ static int mc_offset_to_addr(struct seg_info *seg, struct bank_info *bank, uint64_t off, uint64_t *addr) { uint64_t base, size, line, remainder; uint32_t ifactor; /* * Compute the half-dimm size in bytes. * Note that bank->size represents the number of data bytes, * and does not include the additional bits used for ecc, mtag, * and mtag ecc information in each 144-bit checkword. * For calculating the offset to a checkword we need the size * including the additional 8 bytes for each 64 data bytes of * a cache line. */ size = ((bank->size / 4) / 64) * 72; /* * Check if the offset is within this bank. This depends on the position * of the bank, i.e., whether it is the front bank or the back bank. */ base = size * bank->pos; if ((off < base) || (off >= (base + size))) return (-1); /* * Compute the offset within the half-dimm. */ off -= base; /* * Compute the line within the half-dimm. This is the same as the line * within the bank since each DIMM in a bank contributes uniformly * 144 bits (18 bytes) to a cache line. */ line = off / QWORD_SIZE_BYTES; remainder = off % QWORD_SIZE_BYTES; /* * Compute the line within the segment. * The bank->lm field indicates the order in which cache lines are * distributed across the banks of a segment (See the Cheetah PRM). * The interleave factor the bank is programmed with is used instead * of the segment interleave factor since a segment can be composed * of banks with different interleave factors if the banks are not * uniform in size. */ ifactor = (bank->lk ^ 0xF) + 1; line = (line * ifactor) + bank->lm; /* * Compute the physical address assuming that there are 64 data bytes * in a cache line. */ *addr = (line << 6) + seg->base; *addr += remainder * 16; return (0); } /* * Translate a physical address to a pair. */ static void mc_addr_to_offset(struct seg_info *seg, struct bank_info *bank, uint64_t addr, uint64_t *off) { uint64_t base, size, line, remainder; uint32_t ifactor; /* * Compute the line within the segment assuming that there are 64 data * bytes in a cache line. */ line = (addr - seg->base) / 64; /* * The lm (lower match) field from the Memory Address Decoding Register * for this bank determines which lines within a memory segment this * bank should respond to. These are the actual address bits the * interleave is done over (See the Cheetah PRM). * In other words, the lm field indicates the order in which the cache * lines are distributed across the banks of a segment, and thusly it * can be used to compute the line within this bank. This is the same as * the line within the half-dimm. This is because each DIMM in a bank * contributes uniformly to every cache line. */ ifactor = (bank->lk ^ 0xF) + 1; line = (line - bank->lm)/ifactor; /* * Compute the offset within the half-dimm. This depends on whether * or not the bank is a front logical bank or a back logical bank. */ *off = line * QWORD_SIZE_BYTES; /* * Compute the half-dimm size in bytes. * Note that bank->size represents the number of data bytes, * and does not include the additional bits used for ecc, mtag, * and mtag ecc information in each 144-bit quadword. * For calculating the offset to a checkword we need the size * including the additional 8 bytes for each 64 data bytes of * a cache line. */ size = ((bank->size / 4) / 64) * 72; /* * Compute the offset within the dimm to the nearest line. This depends * on whether or not the bank is a front logical bank or a back logical * bank. */ base = size * bank->pos; *off += base; remainder = (addr - seg->base) % 64; remainder /= 16; *off += remainder; } /* * A cache line is composed of four quadwords with the associated ECC, the * MTag along with its associated ECC. This is depicted below: * * | Data | ECC | Mtag |MTag ECC| * 127 0 8 0 2 0 3 0 * * synd_code will be mapped as the following order to mc_get_mem_unum. * 143 16 7 4 0 * * | Quadword 0 | Quadword 1 | Quadword 2 | Quadword 3 | * 575 432 431 288 287 144 143 0 * * dimm table: each bit at a cache line needs two bits to present one of * four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in * big edian order, i.e. dimm_table[0] presents for bit 572 to 575. * * pin table: each bit at a cache line needs one byte to present pin position, * where max. is 230. So it needs 576 bytes. The order of table index is * the same as bit position at a cache line, i.e. pin_table[0] presents * for bit 0, Mtag ECC 0 of Quadword 3. * * This is a mapping from syndrome code to QuadWord Logical layout at Safari. * Referring to Figure 3-4, Excalibur Architecture Manual. * This table could be moved to cheetah.c if other platform teams agree with * the bit layout at QuadWord. */ static uint8_t qwordmap[] = { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 0, 1, 2, 3, }; /* ARGSUSED */ static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp) { int i, upper_pa, lower_pa, dimmoffset; int quadword, pos_cacheline, position, index, idx4dimm; int qwlayout = synd_code; short offset, data; char unum[UNUM_NAMLEN]; struct dimm_info *dimmp; struct pin_info *pinp; struct bank_info *bank; /* * Enforce old Openboot requirement for synd code, either a single-bit * code from 0..QWORD_SIZE-1 or -1 (multi-bit error). */ if (qwlayout < -1 || qwlayout >= QWORD_SIZE) return (EINVAL); unum[0] = '\0'; upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT; lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT; DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout)); /* * Scan all logical banks to get one responding to the physical * address. Then compute the index to look up dimm and pin tables * to generate the unum. */ mutex_enter(&mcdatamutex); bank = (struct bank_info *)bank_head; while (bank != NULL) { int bankid, mcid, bankno_permc; bankid = bank->bank_node.id; bankno_permc = bankid % NBANKS; mcid = bankid / NBANKS; /* * The Address Decoding logic decodes the different fields * in the Memory Address Decoding register to determine * whether a particular logical bank should respond to a * physical address. */ if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) | bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) { bank = (struct bank_info *)bank->bank_node.next; continue; } dimmoffset = (bankno_permc % NDGRPS) * NDIMMS; dimmp = (struct dimm_info *)bank->dimminfop; ASSERT(dimmp != NULL); if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) { /* * single-bit error handling, we can identify specific * DIMM. */ pinp = (struct pin_info *)&dimmp->data[0]; if (!dimmp->sym_flag) pinp++; quadword = (paddr & 0x3f) / 16; /* or quadword = (paddr >> 4) % 4; */ pos_cacheline = ((3 - quadword) * QWORD_SIZE) + qwordmap[qwlayout]; position = 575 - pos_cacheline; index = position * 2 / 8; offset = position % 4; /* * Trade-off: We couldn't add pin number to * unum string because statistic number * pumps up at the corresponding dimm not pin. * (void) sprintf(unum, "Pin %1u ", (uint_t) * pinp->pintable[pos_cacheline]); */ DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n", (uint_t)pinp->pintable[pos_cacheline])); data = pinp->dimmtable[index]; idx4dimm = (data >> ((3 - offset) * 2)) & 3; (void) strncpy(unum, (char *)dimmp->label[dimmoffset + idx4dimm], UNUM_NAMLEN); DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum)); /* * platform hook for adding label information to unum. */ mc_add_mem_unum_label(unum, mcid, bankno_permc, idx4dimm); } else { char *p = unum; size_t res = UNUM_NAMLEN; /* * multi-bit error handling, we can only identify * bank of DIMMs. */ for (i = 0; (i < NDIMMS) && (res > 0); i++) { (void) snprintf(p, res, "%s%s", i == 0 ? "" : " ", (char *)dimmp->label[dimmoffset + i]); res -= strlen(p); p += strlen(p); } /* * platform hook for adding label information * to unum. */ mc_add_mem_unum_label(unum, mcid, bankno_permc, -1); } mutex_exit(&mcdatamutex); if ((strlen(unum) >= UNUM_NAMLEN) || (strlen(unum) >= buflen)) { return (ENAMETOOLONG); } else { (void) strncpy(buf, unum, buflen); *lenp = strlen(buf); return (0); } } /* end of while loop for logical bank list */ mutex_exit(&mcdatamutex); return (ENXIO); } /* ARGSUSED */ static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp) { int upper_pa, lower_pa; struct bank_info *bank; struct seg_info *seg; upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT; lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT; /* * Scan all logical banks to get one responding to the physical * address. */ mutex_enter(&mcdatamutex); bank = (struct bank_info *)bank_head; while (bank != NULL) { /* * The Address Decoding logic decodes the different fields * in the Memory Address Decoding register to determine * whether a particular logical bank should respond to a * physical address. */ if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) | bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) { bank = (struct bank_info *)bank->bank_node.next; continue; } seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head); ASSERT(seg != NULL); ASSERT(paddr >= seg->base); mc_addr_to_offset(seg, bank, paddr, offp); mutex_exit(&mcdatamutex); return (0); } mutex_exit(&mcdatamutex); return (ENXIO); } /* * Translate a DIMM pair to a physical address. */ static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr) { struct seg_info *seg; struct bank_info *bank; int first_seg_id; int i, found; ASSERT(sid != NULL); mutex_enter(&mcdatamutex); rw_enter(&mcdimmsids_rw, RW_READER); /* * If DIMM serial ids have not been cached yet, tell the * caller to try again. */ if (mc_dimm_sids == NULL) { rw_exit(&mcdimmsids_rw); return (EAGAIN); } for (i = 0; i < max_entries; i++) { if (mc_dimm_sids[i].mcid == mcid) break; } if (i == max_entries) { rw_exit(&mcdimmsids_rw); mutex_exit(&mcdatamutex); return (ENODEV); } first_seg_id = mc_dimm_sids[i].seg_id; seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head); rw_exit(&mcdimmsids_rw); if (seg == NULL) { mutex_exit(&mcdatamutex); return (ENODEV); } found = 0; for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) { ASSERT(bank->valid); for (i = 0; i < NDIMMS; i++) { if (strncmp((char *)bank->dimmsidp[i], sid, DIMM_SERIAL_ID_LEN) == 0) break; } if (i == NDIMMS) continue; if (mc_offset_to_addr(seg, bank, off, paddr) == -1) continue; found = 1; break; } if (found) { mutex_exit(&mcdatamutex); return (0); } /* * If a bank wasn't found, it may be in another segment. * This can happen if the different logical banks of an MC * have different interleave factors. To deal with this * possibility, we'll do a brute-force search for banks * for this MC with a different seg id then above. */ bank = (struct bank_info *)bank_head; while (bank != NULL) { if (!bank->valid) { bank = (struct bank_info *)bank->bank_node.next; continue; } if (bank->bank_node.id / NBANKS != mcid) { bank = (struct bank_info *)bank->bank_node.next; continue; } /* Ignore banks in the segment we looked in above. */ if (bank->seg_id == mc_dimm_sids[i].seg_id) { bank = (struct bank_info *)bank->bank_node.next; continue; } for (i = 0; i < NDIMMS; i++) { if (strncmp((char *)bank->dimmsidp[i], sid, DIMM_SERIAL_ID_LEN) == 0) break; } if (i == NDIMMS) { bank = (struct bank_info *)bank->bank_node.next; continue; } seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head); if (mc_offset_to_addr(seg, bank, off, paddr) == -1) { bank = (struct bank_info *)bank->bank_node.next; continue; } found = 1; break; } mutex_exit(&mcdatamutex); if (found) return (0); else return (ENOENT); } static int mc_get_mem_info(int synd_code, uint64_t paddr, uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep, int *segsp, int *banksp, int *mcidp) { int upper_pa, lower_pa; struct bank_info *bankp; if (synd_code < -1 || synd_code >= QWORD_SIZE) return (EINVAL); upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT; lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT; /* * Scan all logical banks to get one responding to the physical * address. */ mutex_enter(&mcdatamutex); bankp = (struct bank_info *)bank_head; while (bankp != NULL) { struct seg_info *segp; int bankid, mcid; bankid = bankp->bank_node.id; mcid = bankid / NBANKS; /* * The Address Decoding logic decodes the different fields * in the Memory Address Decoding register to determine * whether a particular logical bank should respond to a * physical address. */ if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) | bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) { bankp = (struct bank_info *)bankp->bank_node.next; continue; } /* * Get the corresponding segment. */ if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id, seg_head)) == NULL) { mutex_exit(&mcdatamutex); return (EFAULT); } *mem_sizep = memsize; *seg_sizep = segp->size; *bank_sizep = bankp->size; *segsp = nsegments; *banksp = segp->nbanks; *mcidp = mcid; mutex_exit(&mcdatamutex); return (0); } /* end of while loop for logical bank list */ mutex_exit(&mcdatamutex); return (ENXIO); } /* * Construct lists for an enabled MC where size of memory is 0. * The lists are connected as follows: * Attached MC -> device group list -> device list(per devgrp). */ static void mc_construct(int mc_id, void *dimminfop) { int i, j, idx, dmidx; struct mctrl_info *mctrl; struct dgrp_info *dgrp; struct device_info *dev; struct dimm_info *dimmp = (struct dimm_info *)dimminfop; mutex_enter(&mcdatamutex); /* allocate for mctrl_info and bank_info */ if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) != NULL) { cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id); mutex_exit(&mcdatamutex); return; } mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP); /* * If dimminfop is NULL, the Memory Controller is disable, and * the number of device group will be zero. */ if (dimminfop == NULL) { mctrl->mctrl_node.id = mc_id; mctrl->ndevgrps = 0; mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail); mutex_exit(&mcdatamutex); return; } /* add the entry on dgrp_info list */ for (i = 0; i < NDGRPS; i++) { idx = mc_id * NDGRPS + i; mctrl->devgrpids[i] = idx; if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) != NULL) { cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n", idx); continue; } dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP); /* add the entry on device_info list */ for (j = 0; j < NDIMMS; j++) { dmidx = idx * NDIMMS + j; dgrp->deviceids[j] = dmidx; if ((dev = (struct device_info *) mc_node_get(dmidx, device_head)) != NULL) { cmn_err(CE_WARN, "mc_construct: device %d " "exists\n", dmidx); continue; } dev = kmem_zalloc(sizeof (struct device_info), KM_SLEEP); dev->dev_node.id = dmidx; dev->size = 0; (void) strncpy(dev->label, (char *) dimmp->label[i * NDIMMS + j], MAX_DEVLEN); mc_node_add((mc_dlist_t *)dev, &device_head, &device_tail); } /* for loop for constructing device_info */ dgrp->dgrp_node.id = idx; dgrp->ndevices = NDIMMS; dgrp->size = 0; mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail); } /* end of for loop for constructing dgrp_info list */ mctrl->mctrl_node.id = mc_id; mctrl->ndevgrps = NDGRPS; mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail); mutex_exit(&mcdatamutex); } /* * Construct lists for Memory Configuration at logical viewpoint. * * Retrieve information from Memory Address Decoding Register and set up * bank and segment lists. Link bank to its corresponding device group, and * update size of device group and devices. Also connect bank to the segment. * * Memory Address Decoding Register * ------------------------------------------------------------------------- * |63|62 53|52 41|40 37|36 20|19 18|17 14|13 12|11 8|7 0| * |-----------|----------|------|---------|-----|------|-----|-----|-------| * |V | - | UK | - | UM | - | LK | - | LM | - | * ------------------------------------------------------------------------- * */ static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop) { int i, dmidx, idx; uint32_t ifactor; int status = 0; uint64_t size, base; struct seg_info *seg_curr; struct bank_info *bank_curr; struct dgrp_info *dgrp; struct device_info *dev; union { struct { uint64_t valid : 1; uint64_t resrv1 : 10; uint64_t uk : 12; uint64_t resrv2 : 4; uint64_t um : 17; uint64_t resrv3 : 2; uint64_t lk : 4; uint64_t resrv4 : 2; uint64_t lm : 4; uint64_t resrv5 : 8; } _s; uint64_t madreg; } mcreg; mcreg.madreg = reg; DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num " "%d, reg 0x%lx\n", mc_id, bank_no, reg)); /* add the entry on bank_info list */ idx = mc_id * NBANKS + bank_no; mutex_enter(&mcdatamutex); if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head)) != NULL) { cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no); goto exit; } bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP); bank_curr->bank_node.id = idx; bank_curr->valid = mcreg._s.valid; bank_curr->dimminfop = dimminfop; if (!mcreg._s.valid) { mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail); goto exit; } /* * size of a logical bank = size of segment / interleave factor * This fomula is not only working for regular configuration, * i.e. number of banks at a segment equals to the max * interleave factor, but also for special case, say 3 bank * interleave. One bank is 2 way interleave and other two are * 4 way. So the sizes of banks are size of segment/2 and /4 * respectively. */ ifactor = (mcreg._s.lk ^ 0xF) + 1; size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor; base = mcreg._s.um & ~mcreg._s.uk; base <<= MADR_UPA_SHIFT; bank_curr->uk = mcreg._s.uk; bank_curr->um = mcreg._s.um; bank_curr->lk = mcreg._s.lk; bank_curr->lm = mcreg._s.lm; bank_curr->size = size; /* * The bank's position depends on which halves of the DIMMs it consists * of. The front-side halves of the 4 DIMMs constitute the front bank * and the back-side halves constitute the back bank. Bank numbers * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side * banks. */ bank_curr->pos = bank_no >> 1; ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1)); /* * Workaround to keep gcc and SS12 lint happy. * Lint expects lk, uk and um in the format statement below * to use %lx, but this produces a warning when compiled with * gcc. */ #if defined(lint) DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, " "lk 0x%lx uk 0x%lx um 0x%lx ifactor 0x%x size 0x%lx base 0x%lx\n", idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base)); #else /* lint */ DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, " "lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n", idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base)); #endif /* lint */ /* connect the entry and update the size on dgrp_info list */ idx = mc_id * NDGRPS + (bank_no % NDGRPS); if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) { /* all avaiable dgrp should be linked at mc_construct */ cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx); kmem_free(bank_curr, sizeof (struct bank_info)); status = -1; goto exit; } bank_curr->devgrp_id = idx; dgrp->size += size; /* Update the size of entry on device_info list */ for (i = 0; i < NDIMMS; i++) { dmidx = dgrp->dgrp_node.id * NDIMMS + i; dgrp->deviceids[i] = dmidx; /* avaiable device should be linked at mc_construct */ if ((dev = (struct device_info *)mc_node_get(dmidx, device_head)) == NULL) { cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n", dmidx); kmem_free(bank_curr, sizeof (struct bank_info)); status = -1; goto exit; } dev->size += (size / NDIMMS); DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n", dmidx, size)); } /* * Get the segment by matching the base address, link this bank * to the segment. If not matched, allocate a new segment and * add it at segment list. */ if (seg_curr = seg_match_base(base)) { seg_curr->nbanks++; seg_curr->size += size; if (ifactor > seg_curr->ifactor) seg_curr->ifactor = ifactor; bank_curr->seg_id = seg_curr->seg_node.id; } else { seg_curr = (struct seg_info *) kmem_zalloc(sizeof (struct seg_info), KM_SLEEP); bank_curr->seg_id = seg_id; seg_curr->seg_node.id = seg_id++; seg_curr->base = base; seg_curr->size = size; seg_curr->nbanks = 1; seg_curr->ifactor = ifactor; mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail); nsegments++; } /* Get the local id of bank which is only unique per segment. */ bank_curr->local_id = seg_curr->nbanks - 1; /* add bank at the end of the list; not sorted by bankid */ if (seg_curr->hb_inseg != NULL) { bank_curr->p_inseg = seg_curr->tb_inseg; bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg; seg_curr->tb_inseg->n_inseg = bank_curr; seg_curr->tb_inseg = bank_curr; } else { bank_curr->n_inseg = bank_curr->p_inseg = NULL; seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr; } DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n", seg_curr->seg_node.id)); if (mc_dimm_sids) { rw_enter(&mcdimmsids_rw, RW_WRITER); mc_update_bank(bank_curr); rw_exit(&mcdimmsids_rw); } mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail); memsize += size; if (seg_curr->nbanks > maxbanks) maxbanks = seg_curr->nbanks; exit: mutex_exit(&mcdatamutex); return (status); } /* * Delete nodes related to the given MC on mc, device group, device, * and bank lists. Moreover, delete corresponding segment if its connected * banks are all removed. * * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In * this case, the DIMM data structures need to be deleted. The argument is * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case, * the DIMM data structures are left alone. */ static void mlayout_del(int mc_id, int delete) { int i, j, dgrpid, devid, bankid, ndevgrps; struct seg_info *seg; struct bank_info *bank_curr; struct mctrl_info *mctrl; mc_dlist_t *dgrp_ptr; mc_dlist_t *dev_ptr; uint64_t base; mutex_enter(&mcdatamutex); /* delete mctrl_info */ if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) != NULL) { ndevgrps = mctrl->ndevgrps; mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail); kmem_free(mctrl, sizeof (struct mctrl_info)); nmcs--; /* * There is no other list left for disabled MC. */ if (ndevgrps == 0) { mutex_exit(&mcdatamutex); return; } } else cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n"); /* Delete device groups and devices of the detached MC */ for (i = 0; i < NDGRPS; i++) { dgrpid = mc_id * NDGRPS + i; if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) { cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid); continue; } for (j = 0; j < NDIMMS; j++) { devid = dgrpid * NDIMMS + j; if (dev_ptr = mc_node_get(devid, device_head)) { mc_node_del(dev_ptr, &device_head, &device_tail); kmem_free(dev_ptr, sizeof (struct device_info)); } else { cmn_err(CE_WARN, "mlayout_del: no dev %d\n", devid); } } mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail); kmem_free(dgrp_ptr, sizeof (struct dgrp_info)); } /* Delete banks and segments if it has no bank */ for (i = 0; i < NBANKS; i++) { bankid = mc_id * NBANKS + i; DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid)); if (!(bank_curr = (struct bank_info *)mc_node_get(bankid, bank_head))) { cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid); continue; } if (bank_curr->valid) { base = bank_curr->um & ~bank_curr->uk; base <<= MADR_UPA_SHIFT; bank_curr->valid = 0; memsize -= bank_curr->size; /* Delete bank at segment and segment if no bank left */ if (!(seg = seg_match_base(base))) { cmn_err(CE_WARN, "mlayout_del: no seg\n"); mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail); kmem_free(bank_curr, sizeof (struct bank_info)); continue; } /* update the bank list at the segment */ if (bank_curr->n_inseg == NULL) { /* node is at the tail of list */ seg->tb_inseg = bank_curr->p_inseg; } else { bank_curr->n_inseg->p_inseg = bank_curr->p_inseg; } if (bank_curr->p_inseg == NULL) { /* node is at the head of list */ seg->hb_inseg = bank_curr->n_inseg; } else { bank_curr->p_inseg->n_inseg = bank_curr->n_inseg; } seg->nbanks--; seg->size -= bank_curr->size; if (seg->nbanks == 0) { mc_node_del((mc_dlist_t *)seg, &seg_head, &seg_tail); kmem_free(seg, sizeof (struct seg_info)); nsegments--; } } mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail); kmem_free(bank_curr, sizeof (struct bank_info)); } /* end of for loop for four banks */ if (mc_dimm_sids && delete) { rw_enter(&mcdimmsids_rw, RW_WRITER); i = mc_get_sid_cache_index(mc_id); if (i >= 0) { mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID; if (mc_dimm_sids[i].sids) { kmem_free(mc_dimm_sids[i].sids, sizeof (dimm_sid_t) * (NDGRPS * NDIMMS)); mc_dimm_sids[i].sids = NULL; } } rw_exit(&mcdimmsids_rw); } mutex_exit(&mcdatamutex); } /* * Search the segment in the list starting at seg_head by base address * input: base address * return: pointer of found segment or null if not found. */ static struct seg_info * seg_match_base(u_longlong_t base) { static struct seg_info *seg_ptr; seg_ptr = (struct seg_info *)seg_head; while (seg_ptr != NULL) { DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n", seg_ptr->base, base)); if (seg_ptr->base == base) break; seg_ptr = (struct seg_info *)seg_ptr->seg_node.next; } return (seg_ptr); } /* * mc_dlist is a double linking list, including unique id, and pointers to * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info, * and mctrl_info has it at the top to share the operations, add, del, and get. * * The new node is added at the tail and is not sorted. * * Input: The pointer of node to be added, head and tail of the list */ static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail) { DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n", node->id, (void *)*head, (void *)*tail)); if (*head != NULL) { node->prev = *tail; node->next = (*tail)->next; (*tail)->next = node; *tail = node; } else { node->next = node->prev = NULL; *head = *tail = node; } } /* * Input: The pointer of node to be deleted, head and tail of the list * * Deleted node will be at the following positions * 1. At the tail of the list * 2. At the head of the list * 3. At the head and tail of the list, i.e. only one left. * 4. At the middle of the list */ static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail) { if (node->next == NULL) { /* deleted node is at the tail of list */ *tail = node->prev; } else { node->next->prev = node->prev; } if (node->prev == NULL) { /* deleted node is at the head of list */ *head = node->next; } else { node->prev->next = node->next; } } /* * Search the list from the head of the list to match the given id * Input: id and the head of the list * Return: pointer of found node */ static mc_dlist_t * mc_node_get(int id, mc_dlist_t *head) { mc_dlist_t *node; node = head; while (node != NULL) { DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n", node->id, id)); if (node->id == id) break; node = node->next; } return (node); } /* * mc-us3 driver allows a platform to add extra label * information to the unum string. If a platform implements a * kernel function called plat_add_mem_unum_label() it will be * executed. This would typically be implemented in the platmod. */ static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm) { if (&plat_add_mem_unum_label) plat_add_mem_unum_label(buf, mcid, bank, dimm); } static int mc_get_sid_cache_index(int mcid) { int i; for (i = 0; i < max_entries; i++) { if (mcid == mc_dimm_sids[i].mcid) return (i); } return (-1); } static void mc_update_bank(struct bank_info *bank) { int i, j; int bankid, mcid, dgrp_no; /* * Mark the MC if DIMM sids are not available. * Mark which segment the DIMMs belong to. Allocate * space to store DIMM serial ids which are later * provided by the platform layer, and update the bank_info * structure with pointers to its serial ids. */ bankid = bank->bank_node.id; mcid = bankid / NBANKS; i = mc_get_sid_cache_index(mcid); if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID) mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED; mc_dimm_sids[i].seg_id = bank->seg_id; if (mc_dimm_sids[i].sids == NULL) { mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc( sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP); } dgrp_no = bank->devgrp_id % NDGRPS; for (j = 0; j < NDIMMS; j++) { bank->dimmsidp[j] = &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)]; } } static int mc_populate_sid_cache(void) { struct bank_info *bank; if (&plat_populate_sid_cache == 0) return (ENOTSUP); ASSERT(RW_WRITE_HELD(&mcdimmsids_rw)); bank = (struct bank_info *)bank_head; while (bank != NULL) { if (!bank->valid) { bank = (struct bank_info *)bank->bank_node.next; continue; } mc_update_bank(bank); bank = (struct bank_info *)bank->bank_node.next; } /* * Call to the platform layer to populate the cache * with DIMM serial ids. */ return (plat_populate_sid_cache(mc_dimm_sids, max_entries)); } static void mc_init_sid_cache_thr(void) { ASSERT(mc_dimm_sids == NULL); mutex_enter(&mcdatamutex); rw_enter(&mcdimmsids_rw, RW_WRITER); mc_dimm_sids = plat_alloc_sid_cache(&max_entries); (void) mc_populate_sid_cache(); rw_exit(&mcdimmsids_rw); mutex_exit(&mcdatamutex); } static int mc_init_sid_cache(void) { if (&plat_alloc_sid_cache) { (void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0, &p0, TS_RUN, minclsyspri); return (0); } else return (ENOTSUP); } static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp) { int i; if (buflen < DIMM_SERIAL_ID_LEN) return (ENOSPC); /* * If DIMM serial ids have not been cached yet, tell the * caller to try again. */ if (!rw_tryenter(&mcdimmsids_rw, RW_READER)) return (EAGAIN); if (mc_dimm_sids == NULL) { rw_exit(&mcdimmsids_rw); return (EAGAIN); } /* * Find dimm serial id using mcid and dimm # */ for (i = 0; i < max_entries; i++) { if (mc_dimm_sids[i].mcid == mcid) break; } if ((i == max_entries) || (!mc_dimm_sids[i].sids)) { rw_exit(&mcdimmsids_rw); return (ENOENT); } (void) strlcpy(buf, mc_dimm_sids[i].sids[dimm], DIMM_SERIAL_ID_LEN); *lenp = strlen(buf); rw_exit(&mcdimmsids_rw); return (0); }