/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/ddi_impldefs.h>
#include <sys/obpdefs.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/open.h>
#include <sys/thread.h>
#include <sys/cpuvar.h>
#include <sys/x_call.h>
#include <sys/debug.h>
#include <sys/sysmacros.h>
#include <sys/ivintr.h>
#include <sys/intr.h>
#include <sys/intreg.h>
#include <sys/autoconf.h>
#include <sys/modctl.h>
#include <sys/spl.h>
#include <sys/async.h>
#include <sys/mc.h>
#include <sys/mc-us3.h>
#include <sys/cpu_module.h>

/*
 * Function prototypes
 */

static int mc_open(dev_t *, int, int, cred_t *);
static int mc_close(dev_t, int, int, cred_t *);
static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
static int mc_detach(dev_info_t *, ddi_detach_cmd_t);

/*
 * Configuration data structures
 */
static struct cb_ops mc_cb_ops = {
	mc_open,			/* open */
	mc_close,			/* close */
	nulldev,			/* strategy */
	nulldev,			/* print */
	nodev,				/* dump */
	nulldev,			/* read */
	nulldev,			/* write */
	mc_ioctl,			/* ioctl */
	nodev,				/* devmap */
	nodev,				/* mmap */
	nodev,				/* segmap */
	nochpoll,			/* poll */
	ddi_prop_op,			/* cb_prop_op */
	0,				/* streamtab */
	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
	CB_REV,				/* rev */
	nodev,				/* cb_aread */
	nodev				/* cb_awrite */
};

static struct dev_ops mc_ops = {
	DEVO_REV,			/* rev */
	0,				/* refcnt  */
	ddi_getinfo_1to1,		/* getinfo */
	nulldev,			/* identify */
	nulldev,			/* probe */
	mc_attach,			/* attach */
	mc_detach,			/* detach */
	nulldev,			/* reset */
	&mc_cb_ops,			/* cb_ops */
	(struct bus_ops *)0,		/* bus_ops */
	nulldev				/* power */
};

/*
 * Driver globals
 */
static void *mcp;
static int nmcs = 0;
static int seg_id = 0;
static int nsegments = 0;
static uint64_t memsize = 0;
static int maxbanks = 0;

static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
static mc_dlist_t *device_head, *device_tail;

static kmutex_t	mcmutex;
static kmutex_t	mcdatamutex;
static int mc_is_open = 0;

extern struct mod_ops mod_driverops;

static struct modldrv modldrv = {
	&mod_driverops,			/* module type, this one is a driver */
	"Memory-controller: %I%",	/* module name */
	&mc_ops,			/* driver ops */
};

static struct modlinkage modlinkage = {
	MODREV_1,		/* rev */
	(void *)&modldrv,
	NULL
};

static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
    int buflen, int *lenp);
static int mc_get_mem_info(int synd_code, uint64_t paddr,
    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    int *segsp, int *banksp, int *mcidp);
static int mc_get_mcregs(struct mc_soft_state *);
static void mc_construct(int mc_id, void *dimminfop);
static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
static void mlayout_del(int mc_id);
static struct seg_info *seg_match_base(u_longlong_t base);
static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);

#pragma weak p2get_mem_unum
#pragma weak p2get_mem_info
#pragma weak plat_add_mem_unum_label

/*
 * These are the module initialization routines.
 */

int
_init(void)
{
	int error;

	if ((error = ddi_soft_state_init(&mcp,
	    sizeof (struct mc_soft_state), 1)) != 0)
		return (error);

	error =  mod_install(&modlinkage);
	if (error == 0) {
		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
	}

	return (error);
}

int
_fini(void)
{
	int error;

	if ((error = mod_remove(&modlinkage)) != 0)
		return (error);

	ddi_soft_state_fini(&mcp);
	mutex_destroy(&mcmutex);
	mutex_destroy(&mcdatamutex);

	return (0);
}

int
_info(struct modinfo *modinfop)
{
	return (mod_info(&modlinkage, modinfop));
}

static int
mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
{
	struct mc_soft_state *softsp;
	struct dimm_info *dimminfop;
	int instance, len, err;

	/* get the instance of this devi */
	instance = ddi_get_instance(devi);

	switch (cmd) {
	case DDI_ATTACH:
		break;

	case DDI_RESUME:
		/* get the soft state pointer for this device node */
		softsp = ddi_get_soft_state(mcp, instance);
		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
		    instance));
		/*
		 * During resume, the source and target board's bank_infos
		 * need to be updated with the new mc MADR values.  This is
		 * implemented with existing functionality by first removing
		 * the props and allocated data structs, and then adding them
		 * back in.
		 */
		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
		    MEM_CFG_PROP_NAME) == 1) {
			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
			    MEM_CFG_PROP_NAME);
		}
		mlayout_del(softsp->portid);
		if (mc_get_mcregs(softsp) == -1) {
			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
			    instance);
		}
		return (DDI_SUCCESS);

	default:
		return (DDI_FAILURE);
	}

	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
		return (DDI_FAILURE);

	softsp = ddi_get_soft_state(mcp, instance);

	/* Set the dip in the soft state */
	softsp->dip = devi;

	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
		    instance, "portid"));
		goto bad;
	}

	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
	    instance, softsp->portid, CPU->cpu_id));

	/* map in the registers for this device. */
	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
		    instance));
		goto bad;
	}

	/*
	 * Get the label of dimms and pin routing information at memory-layout
	 * property if the memory controller is enabled.
	 *
	 * Basically every memory-controller node on every machine should
	 * have one of these properties unless the memory controller is
	 * physically not capable of having memory attached to it, e.g.
	 * Excalibur's slave processor.
	 */
	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
	    "memory-layout", (caddr_t)&dimminfop, &len);
	if (err == DDI_PROP_SUCCESS) {
		/*
		 * Set the pointer and size of property in the soft state
		 */
		softsp->memlayoutp = dimminfop;
		softsp->size = len;
	} else if (err == DDI_PROP_NOT_FOUND) {
		/*
		 * This is a disable MC. Clear out the pointer and size
		 * of property in the soft state
		 */
		softsp->memlayoutp = NULL;
		softsp->size = 0;
	} else {
		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
		    instance, dimminfop));
		goto bad2;
	}

	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
	    instance, dimminfop, *(uint64_t *)dimminfop, len));

	/* Get MC registers and construct all needed data structure */
	if (mc_get_mcregs(softsp) == -1)
		goto bad1;

	mutex_enter(&mcmutex);
	if (nmcs == 1) {
		if (&p2get_mem_unum)
			p2get_mem_unum = mc_get_mem_unum;
		if (&p2get_mem_info)
			p2get_mem_info = mc_get_mem_info;
	}
	mutex_exit(&mcmutex);

	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
		    " failed \n"));
		goto bad1;
	}

	ddi_report_dev(devi);
	return (DDI_SUCCESS);

bad1:
	/* release all allocated data struture for this MC */
	mlayout_del(softsp->portid);
	if (softsp->memlayoutp != NULL)
		kmem_free(softsp->memlayoutp, softsp->size);

	/* remove the libdevinfo property */
	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
	    MEM_CFG_PROP_NAME) == 1) {
		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
			MEM_CFG_PROP_NAME);
	}

bad2:
	/* unmap the registers for this device. */
	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);

bad:
	ddi_soft_state_free(mcp, instance);
	return (DDI_FAILURE);
}

/* ARGSUSED */
static int
mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
{
	int instance;
	struct mc_soft_state *softsp;

	/* get the instance of this devi */
	instance = ddi_get_instance(devi);

	/* get the soft state pointer for this device node */
	softsp = ddi_get_soft_state(mcp, instance);

	switch (cmd) {
	case DDI_SUSPEND:
		return (DDI_SUCCESS);

	case DDI_DETACH:
		break;

	default:
		return (DDI_FAILURE);
	}

	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
	    instance, softsp->portid, softsp->memlayoutp));

	/* remove the libdevinfo property */
	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
	    MEM_CFG_PROP_NAME) == 1) {
		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
			MEM_CFG_PROP_NAME);
	}

	/* release all allocated data struture for this MC */
	mlayout_del(softsp->portid);
	if (softsp->memlayoutp != NULL)
		kmem_free(softsp->memlayoutp, softsp->size);

	/* unmap the registers */
	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);

	mutex_enter(&mcmutex);
	if (nmcs == 0) {
		if (&p2get_mem_unum)
			p2get_mem_unum = NULL;
		if (&p2get_mem_info)
			p2get_mem_info = NULL;
	}
	mutex_exit(&mcmutex);

	ddi_remove_minor_node(devi, NULL);

	/* free up the soft state */
	ddi_soft_state_free(mcp, instance);

	return (DDI_SUCCESS);
}

/* ARGSUSED */
static int
mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
	int status = 0;

	/* verify that otyp is appropriate */
	if (otyp != OTYP_CHR) {
		return (EINVAL);
	}

	mutex_enter(&mcmutex);
	if (mc_is_open) {
		status = EBUSY;
		goto bad;
	}
	mc_is_open = 1;
bad:
	mutex_exit(&mcmutex);
	return (status);
}

/* ARGSUSED */
static int
mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
{
	mutex_enter(&mcmutex);
	mc_is_open = 0;
	mutex_exit(&mcmutex);

	return (0);
}

/*
 * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
 * MCIOC_CTRLCONF, MCIOC_CONTROL.
 *
 * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
 * associated with various length struct. If given number is less than the
 * number in kernel, update the number and return EINVAL so that user could
 * allocate enough space for it.
 *
 */

/* ARGSUSED */
static int
mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
	int *rval_p)
{
	size_t	size;
	struct mc_memconf mcmconf;
	struct mc_memory *mcmem, mcmem_in;
	struct mc_segment *mcseg, mcseg_in;
	struct mc_bank mcbank;
	struct mc_devgrp mcdevgrp;
	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
	struct mc_control *mccontrol, mccontrol_in;
	struct seg_info *seg = NULL;
	struct bank_info *bank = NULL;
	struct dgrp_info *dgrp = NULL;
	struct mctrl_info *mcport;
	mc_dlist_t *mctrl;
	int i, status = 0;
	cpu_t *cpu;

	switch (cmd) {
	case MCIOC_MEMCONF:
		mutex_enter(&mcdatamutex);

		mcmconf.nmcs = nmcs;
		mcmconf.nsegments = nsegments;
		mcmconf.nbanks = maxbanks;
		mcmconf.ndevgrps = NDGRPS;
		mcmconf.ndevs = NDIMMS;
		mcmconf.len_dev = MAX_DEVLEN;
		mcmconf.xfer_size = TRANSFER_SIZE;

		mutex_exit(&mcdatamutex);

		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
			return (EFAULT);
		return (0);

	/*
	 * input: nsegments and allocate space for various length of segmentids
	 *
	 * return    0: size, number of segments, and all segment ids,
	 *		where glocal and local ids are identical.
	 *	EINVAL: if the given nsegments is less than that in kernel and
	 *		nsegments of struct will be updated.
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_MEM:
		if (copyin((void *)arg, &mcmem_in,
		    sizeof (struct mc_memory)) != 0)
			return (EFAULT);

		mutex_enter(&mcdatamutex);
		if (mcmem_in.nsegments < nsegments) {
			mcmem_in.nsegments = nsegments;
			if (copyout(&mcmem_in, (void *)arg,
			    sizeof (struct mc_memory)))
				status = EFAULT;
			else
				status = EINVAL;

			mutex_exit(&mcdatamutex);
			return (status);
		}

		size = sizeof (struct mc_memory) + (nsegments - 1) *
		    sizeof (mcmem->segmentids[0]);
		mcmem = kmem_zalloc(size, KM_SLEEP);

		mcmem->size = memsize;
		mcmem->nsegments = nsegments;
		seg = (struct seg_info *)seg_head;
		for (i = 0; i < nsegments; i++) {
			ASSERT(seg != NULL);
			mcmem->segmentids[i].globalid = seg->seg_node.id;
			mcmem->segmentids[i].localid = seg->seg_node.id;
			seg = (struct seg_info *)seg->seg_node.next;
		}
		mutex_exit(&mcdatamutex);

		if (copyout(mcmem, (void *)arg, size))
			status = EFAULT;

		kmem_free(mcmem, size);
		return (status);

	/*
	 * input: id, nbanks and allocate space for various length of bankids
	 *
	 * return    0: base, size, number of banks, and all bank ids,
	 *		where global id is unique of all banks and local id
	 *		is only unique for mc.
	 *	EINVAL: either id isn't found or if given nbanks is less than
	 *		that in kernel and nbanks of struct will be updated.
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_SEG:

		if (copyin((void *)arg, &mcseg_in,
		    sizeof (struct mc_segment)) != 0)
			return (EFAULT);

		mutex_enter(&mcdatamutex);
		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
		    seg_head)) == NULL) {
			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
			    "id %d\n", mcseg_in.id));
			mutex_exit(&mcdatamutex);
			return (EFAULT);
		}

		if (mcseg_in.nbanks < seg->nbanks) {
			mcseg_in.nbanks = seg->nbanks;
			if (copyout(&mcseg_in, (void *)arg,
			    sizeof (struct mc_segment)))
				status = EFAULT;
			else
				status = EINVAL;

			mutex_exit(&mcdatamutex);
			return (status);
		}

		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
		    sizeof (mcseg->bankids[0]);
		mcseg = kmem_zalloc(size, KM_SLEEP);

		mcseg->id = seg->seg_node.id;
		mcseg->ifactor = seg->ifactor;
		mcseg->base = seg->base;
		mcseg->size = seg->size;
		mcseg->nbanks = seg->nbanks;

		bank = seg->hb_inseg;

		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
		    seg->nbanks, seg, bank));

		i = 0;
		while (bank != NULL) {
			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
			    i, bank->bank_node.id));
			mcseg->bankids[i].globalid = bank->bank_node.id;
			mcseg->bankids[i++].localid =
			    bank->local_id;
			bank = bank->n_inseg;
		}
		ASSERT(i == seg->nbanks);
		mutex_exit(&mcdatamutex);

		if (copyout(mcseg, (void *)arg, size))
			status = EFAULT;

		kmem_free(mcseg, size);
		return (status);

	/*
	 * input: id
	 *
	 * return    0: mask, match, size, and devgrpid,
	 *		where global id is unique of all devgrps and local id
	 *		is only unique for mc.
	 *	EINVAL: if id isn't found
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_BANK:
		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
			return (EFAULT);

		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));

		mutex_enter(&mcdatamutex);

		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
		    bank_head)) == NULL) {
			mutex_exit(&mcdatamutex);
			return (EINVAL);
		}

		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
		    bank->bank_node.id, bank, bank->valid));

		/*
		 * If (Physic Address & MASK) == MATCH, Physic Address is
		 * located at this bank. The lower physical address bits
		 * are at [9-6].
		 */
		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
		mcbank.match = bank->lm << MADR_LPA_SHIFT;
		mcbank.size = bank->size;
		mcbank.devgrpid.globalid = bank->devgrp_id;
		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;

		mutex_exit(&mcdatamutex);

		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
			return (EFAULT);
		return (0);

	/*
	 * input:id and allocate space for various length of deviceids
	 *
	 * return    0: size and number of devices.
	 *	EINVAL: id isn't found
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_DEVGRP:

		if (copyin((void *)arg, &mcdevgrp,
		    sizeof (struct mc_devgrp)) != 0)
			return (EFAULT);

		mutex_enter(&mcdatamutex);
		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
		    dgrp_head)) == NULL) {
			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
			    "%d\n", mcdevgrp.id));
			mutex_exit(&mcdatamutex);
			return (EINVAL);
		}

		mcdevgrp.ndevices = dgrp->ndevices;
		mcdevgrp.size = dgrp->size;

		mutex_exit(&mcdatamutex);

		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
			status = EFAULT;

		return (status);

	/*
	 * input: nmcs and allocate space for various length of mcids
	 *
	 * return    0: number of mc, and all mcids,
	 *		where glocal and local ids are identical.
	 *	EINVAL: if the given nmcs is less than that in kernel and
	 *		nmcs of struct will be updated.
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_CTRLCONF:
		if (copyin((void *)arg, &mcctrlconf_in,
		    sizeof (struct mc_ctrlconf)) != 0)
			return (EFAULT);

		mutex_enter(&mcdatamutex);
		if (mcctrlconf_in.nmcs < nmcs) {
			mcctrlconf_in.nmcs = nmcs;
			if (copyout(&mcctrlconf_in, (void *)arg,
			    sizeof (struct mc_ctrlconf)))
				status = EFAULT;
			else
				status = EINVAL;

			mutex_exit(&mcdatamutex);
			return (status);
		}

		/*
		 * Cannot just use the size of the struct because of the various
		 * length struct
		 */
		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
		    sizeof (mcctrlconf->mcids[0]));
		mcctrlconf = kmem_zalloc(size, KM_SLEEP);

		mcctrlconf->nmcs = nmcs;

		/* Get all MC ids and add to mcctrlconf */
		mctrl = mctrl_head;
		i = 0;
		while (mctrl != NULL) {
			mcctrlconf->mcids[i].globalid = mctrl->id;
			mcctrlconf->mcids[i].localid = mctrl->id;
			i++;
			mctrl = mctrl->next;
		}
		ASSERT(i == nmcs);

		mutex_exit(&mcdatamutex);

		if (copyout(mcctrlconf, (void *)arg, size))
			status = EFAULT;

		kmem_free(mcctrlconf, size);
		return (status);

	/*
	 * input:id, ndevgrps and allocate space for various length of devgrpids
	 *
	 * return    0: number of devgrp, and all devgrpids,
	 *		is unique of all devgrps and local id is only unique
	 *		for mc.
	 *	EINVAL: either if id isn't found or if the given ndevgrps is
	 *		less than that in kernel and ndevgrps of struct will
	 *		be updated.
	 *	EFAULT: if other errors in kernel.
	 */
	case MCIOC_CONTROL:
		if (copyin((void *)arg, &mccontrol_in,
		    sizeof (struct mc_control)) != 0)
			return (EFAULT);

		mutex_enter(&mcdatamutex);
		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
		    mctrl_head)) == NULL) {
			mutex_exit(&mcdatamutex);
			return (EINVAL);
		}

		/*
		 * mcport->ndevgrps zero means Memory Controller is disable.
		 */
		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
		    (mcport->ndevgrps == 0)) {
			mccontrol_in.ndevgrps = mcport->ndevgrps;
			if (copyout(&mccontrol_in, (void *)arg,
			    sizeof (struct mc_control)))
				status = EFAULT;
			else if (mcport->ndevgrps != 0)
				status = EINVAL;

			mutex_exit(&mcdatamutex);
			return (status);
		}

		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
		    sizeof (mccontrol->devgrpids[0]);
		mccontrol = kmem_zalloc(size, KM_SLEEP);

		mccontrol->id = mcport->mctrl_node.id;
		mccontrol->ndevgrps = mcport->ndevgrps;
		for (i = 0; i < mcport->ndevgrps; i++) {
			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
			mccontrol->devgrpids[i].localid =
			    mcport->devgrpids[i] % NDGRPS;
			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
			    *(uint64_t *)&mccontrol->devgrpids[i]));
		}
		mutex_exit(&mcdatamutex);

		if (copyout(mccontrol, (void *)arg, size))
			status = EFAULT;

		kmem_free(mccontrol, size);
		return (status);

	/*
	 * input:id
	 *
	 * return    0: CPU flushed successfully.
	 *	EINVAL: the id wasn't found
	 */
	case MCIOC_ECFLUSH:
		mutex_enter(&cpu_lock);
		cpu = cpu_get((processorid_t)arg);
		mutex_exit(&cpu_lock);
		if (cpu == NULL)
			return (EINVAL);

		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);

		return (0);

	default:
		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
		return (EFAULT);
	}
}

/*
 * Get Memory Address Decoding Registers and construct list.
 * flag is to workaround Cheetah's restriction where register cannot be mapped
 * if port id(MC registers on it) == cpu id(process is running on it).
 */
static int
mc_get_mcregs(struct mc_soft_state *softsp)
{
	int i;
	int err = 0;
	uint64_t madreg;
	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */

	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
	mc_construct(softsp->portid, softsp->memlayoutp);

	/*
	 * If memlayoutp is NULL, the Memory Controller is disable, and
	 * doesn't need to create any bank and segment.
	 */
	if (softsp->memlayoutp == NULL)
		goto exit;

	/*
	 * Get the content of 4 Memory Address Decoding Registers, and
	 * construct lists of logical banks and segments.
	 */
	for (i = 0; i < NBANKS; i++) {
		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
		    "cpu=%d\n", softsp->mc_base, softsp->portid, CPU->cpu_id));

		kpreempt_disable();
		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
		else
			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
			    (i * REGOFFSET)));
		kpreempt_enable();

		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
		    "reg=0x%lx\n", softsp->memlayoutp, madreg));

		ma_reg_array[i] = madreg;

		if ((err = mlayout_add(softsp->portid, i, madreg,
		    softsp->memlayoutp)) == -1)
			break;
	}

	/*
	 * Create the logical bank property for this mc node. This
	 * property is an encoded array of the madr for each logical
	 * bank (there are NBANKS of these).
	 */
	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
	    MEM_CFG_PROP_NAME) != 1) {
		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
			DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
			(caddr_t)&ma_reg_array, sizeof (ma_reg_array));
	}

exit:
	if (!err) {
		mutex_enter(&mcdatamutex);
		nmcs++;
		mutex_exit(&mcdatamutex);
	}
	return (err);
}

/*
 * A cache line is composed of four quadwords with the associated ECC, the
 * MTag along with its associated ECC. This is depicted below:
 *
 * |                    Data                    |   ECC   | Mtag |MTag ECC|
 *  127                                         0 8       0 2    0 3      0
 *
 * synd_code will be mapped as the following order to mc_get_mem_unum.
 *  143                                         16        7      4        0
 *
 * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
 *  575         432 431         288 287         144 143		   0
 *
 * dimm table: each bit at a cache line needs two bits to present one of
 *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
 *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
 *
 * pin table: each bit at a cache line needs one byte to present pin position,
 *      where max. is 230. So it needs 576 bytes. The order of table index is
 *      the same as bit position at a cache line, i.e. pin_table[0] presents
 *      for bit 0, Mtag ECC 0 of Quadword 3.
 *
 * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
 * Referring to Figure 3-4, Excalibur Architecture Manual.
 * This table could be moved to cheetah.c if other platform teams agree with
 * the bit layout at QuadWord.
 */

static uint8_t qwordmap[] =
{
16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
};

#define	QWORD_SIZE	144

/* ARGSUSED */
static int
mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
{
	int i, upper_pa, lower_pa, dimmoffset;
	int quadword, pos_cacheline, position, index, idx4dimm;
	int qwlayout = synd_code;
	short offset, data;
	char unum[UNUM_NAMLEN];
	struct dimm_info *dimmp;
	struct pin_info *pinp;
	struct bank_info *bank;

	/*
	 * Enforce old Openboot requirement for synd code, either a single-bit
	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
	 */
	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
		return (EINVAL);

	unum[0] = '\0';

	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;

	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));

	/*
	 * Scan all logical banks to get one responding to the physical
	 * address. Then compute the index to look up dimm and pin tables
	 * to generate the unmuber.
	 */
	mutex_enter(&mcdatamutex);
	bank = (struct bank_info *)bank_head;
	while (bank != NULL) {
		int bankid, mcid, bankno_permc;

		bankid = bank->bank_node.id;
		bankno_permc = bankid % NBANKS;
		mcid = bankid / NBANKS;

		/*
		 * The Address Decoding logic decodes the different fields
		 * in the Memory Address Drcoding register to determine
		 * whether a particular logic bank should respond to a
		 * physical address.
		 */
		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
			bank = (struct bank_info *)bank->bank_node.next;
			continue;
		}

		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;

		dimmp = (struct dimm_info *)bank->dimminfop;
		ASSERT(dimmp != NULL);

		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
			/*
			 * single-bit error handling, we can identify specific
			 * DIMM.
			 */

			pinp = (struct pin_info *)&dimmp->data[0];

			if (!dimmp->sym_flag)
				pinp++;

			quadword = (paddr & 0x3f) / 16;
			/* or quadword = (paddr >> 4) % 4; */
			pos_cacheline = ((3 - quadword) * 144) +
			    qwordmap[qwlayout];
			position = 575 - pos_cacheline;
			index = position * 2 / 8;
			offset = position % 4;

			/*
			 * Trade-off: We cound't add pin number to
			 * unumber string because statistic number
			 * pumps up at the corresponding dimm not pin.
			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
			 * pinp->pintable[pos_cacheline]);
			 */
			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
			    (uint_t)pinp->pintable[pos_cacheline]));
			data = pinp->dimmtable[index];
			idx4dimm = (data >> ((3 - offset) * 2)) & 3;

			(void) strncpy(unum,
			    (char *)dimmp->label[dimmoffset + idx4dimm],
			    UNUM_NAMLEN);
			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
			/*
			 * platform hook for adding label information to unum.
			 */
			mc_add_mem_unum_label(unum, mcid, bankno_permc,
			    idx4dimm);
		} else {
			char *p = unum;
			size_t res = UNUM_NAMLEN;

			/*
			 * multi-bit error handling, we can only identify
			 * bank of DIMMs.
			 */

			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
				(void) snprintf(p, res, "%s%s",
				    i == 0 ? "" : " ",
				    (char *)dimmp->label[dimmoffset + i]);
				res -= strlen(p);
				p += strlen(p);
			}

			/*
			 * platform hook for adding label information
			 * to unum.
			 */
			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
		}
		mutex_exit(&mcdatamutex);
		if ((strlen(unum) >= UNUM_NAMLEN) ||
		    (strlen(unum) >= buflen)) {
			return (ENOSPC);
		} else {
			(void) strncpy(buf, unum, buflen);
			*lenp = strlen(buf);
			return (0);
		}
	}	/* end of while loop for logic bank list */

	mutex_exit(&mcdatamutex);
	return (ENXIO);
}

static int
mc_get_mem_info(int synd_code, uint64_t paddr,
    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    int *segsp, int *banksp, int *mcidp)
{
	int upper_pa, lower_pa;
	struct bank_info *bankp;

	if (synd_code < -1 || synd_code >= QWORD_SIZE)
		return (EINVAL);

	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;

	/*
	 * Scan all logical banks to get one responding to the physical
	 * address.
	 */
	mutex_enter(&mcdatamutex);
	bankp = (struct bank_info *)bank_head;
	while (bankp != NULL) {
		struct seg_info *segp;
		int bankid, mcid;

		bankid = bankp->bank_node.id;
		mcid = bankid / NBANKS;

		/*
		 * The Address Decoding logic decodes the different fields
		 * in the Memory Address Decoding register to determine
		 * whether a particular logic bank should respond to a
		 * physical address.
		 */
		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
			bankp = (struct bank_info *)bankp->bank_node.next;
			continue;
		}

		/*
		 * Get the corresponding segment.
		 */
		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
		    seg_head)) == NULL) {
			mutex_exit(&mcdatamutex);
			return (EFAULT);
		}

		*mem_sizep = memsize;
		*seg_sizep = segp->size;
		*bank_sizep = bankp->size;
		*segsp = nsegments;
		*banksp = segp->nbanks;
		*mcidp = mcid;

		mutex_exit(&mcdatamutex);

		return (0);

	}	/* end of while loop for logic bank list */

	mutex_exit(&mcdatamutex);
	return (ENXIO);
}

/*
 * Construct lists for an enabled MC where size of memory is 0.
 * The lists are connected as follows:
 * Attached MC -> device group list -> device list(per devgrp).
 */
static void
mc_construct(int mc_id, void *dimminfop)
{
	int i, j, idx, dmidx;
	struct mctrl_info *mctrl;
	struct dgrp_info *dgrp;
	struct device_info *dev;
	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;

	mutex_enter(&mcdatamutex);
	/* allocate for mctrl_info and bank_info */
	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
	    mctrl_head)) != NULL) {
		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
		mutex_exit(&mcdatamutex);
		return;
	}

	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);

	/*
	 * If dimminfop is NULL, the Memory Controller is disable, and
	 * the number of device group will be zero.
	 */
	if (dimminfop == NULL) {
		mctrl->mctrl_node.id = mc_id;
		mctrl->ndevgrps = 0;
		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
		mutex_exit(&mcdatamutex);
		return;
	}

	/* add the entry on dgrp_info list */
	for (i = 0; i < NDGRPS; i++) {
		idx = mc_id * NDGRPS + i;
		mctrl->devgrpids[i] = idx;
		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
		    != NULL) {
			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
			    idx);
			continue;
		}

		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);

		/* add the entry on device_info list */
		for (j = 0; j < NDIMMS; j++) {
			dmidx = idx * NDIMMS + j;
			dgrp->deviceids[j] = dmidx;
			if ((dev = (struct device_info *)
			    mc_node_get(dmidx, device_head)) != NULL) {
				cmn_err(CE_WARN, "mc_construct: device %d "
				    "exists\n", dmidx);
				continue;
			}
			dev = kmem_zalloc(sizeof (struct device_info),
			    KM_SLEEP);
			dev->dev_node.id = dmidx;
			dev->size = 0;
			(void) strncpy(dev->label, (char *)
			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);

			mc_node_add((mc_dlist_t *)dev, &device_head,
			    &device_tail);
		}	/* for loop for constructing device_info */

		dgrp->dgrp_node.id = idx;
		dgrp->ndevices = NDIMMS;
		dgrp->size = 0;
		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);

	}	/* end of for loop for constructing dgrp_info list */

	mctrl->mctrl_node.id = mc_id;
	mctrl->ndevgrps = NDGRPS;
	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
	mutex_exit(&mcdatamutex);
}

/*
 * Construct lists for Memory Configuration at logical viewpoint.
 *
 * Retrieve information from Memory Address Decoding Register and set up
 * bank and segment lists. Link bank to its corresponding device group, and
 * update size of device group and devices. Also connect bank to the segment.
 *
 * Memory Address Decoding Register
 * -------------------------------------------------------------------------
 * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
 * |-----------|----------|------|---------|-----|------|-----|-----|-------|
 * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
 * -------------------------------------------------------------------------
 *
 */

static int
mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
{
	int i, dmidx, idx;
	uint32_t ifactor;
	int status = 0;
	uint64_t size, base;
	struct seg_info *seg_curr;
	struct bank_info *bank_curr;
	struct dgrp_info *dgrp;
	struct device_info *dev;
	union {
		struct {
			uint64_t valid	: 1;
			uint64_t resrv1	: 10;
			uint64_t uk	: 12;
			uint64_t resrv2	: 4;
			uint64_t um	: 17;
			uint64_t resrv3	: 2;
			uint64_t lk	: 4;
			uint64_t resrv4	: 2;
			uint64_t lm	: 4;
			uint64_t resrv5	: 8;
		} _s;
		uint64_t madreg;
	} mcreg;

	mcreg.madreg = reg;

	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));

	/* add the entry on bank_info list */
	idx = mc_id * NBANKS + bank_no;

	mutex_enter(&mcdatamutex);
	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
	    != NULL) {
		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
		goto exit;
	}

	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
	bank_curr->bank_node.id = idx;
	bank_curr->valid = mcreg._s.valid;
	bank_curr->dimminfop = dimminfop;

	if (!mcreg._s.valid) {
		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
		goto exit;
	}

	/*
	 * size of a logical bank = size of segment / interleave factor
	 * This fomula is not only working for regular configuration,
	 * i.e. number of banks at a segment equals to the max
	 * interleave factor, but also for special case, say 3 bank
	 * interleave. One bank is 2 way interleave and other two are
	 * 4 way. So the sizes of banks are size of segment/2 and /4
	 * respectively.
	 */
	ifactor = (mcreg._s.lk ^ 0xF) + 1;
	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
	base = mcreg._s.um & ~mcreg._s.uk;
	base <<= MADR_UPA_SHIFT;

	bank_curr->uk = mcreg._s.uk;
	bank_curr->um = mcreg._s.um;
	bank_curr->lk = mcreg._s.lk;
	bank_curr->lm = mcreg._s.lm;
	bank_curr->size = size;

	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
	"lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));

	/* connect the entry and update the size on dgrp_info list */
	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
		/* all avaiable dgrp should be linked at mc_construct */
		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
		kmem_free(bank_curr, sizeof (struct bank_info));
		status = -1;
		goto exit;
	}

	bank_curr->devgrp_id = idx;
	dgrp->size += size;

	/* Update the size of entry on device_info list */
	for (i = 0; i < NDIMMS; i++) {
		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
		dgrp->deviceids[i] = dmidx;

		/* avaiable device should be linked at mc_construct */
		if ((dev = (struct device_info *)mc_node_get(dmidx,
		    device_head)) == NULL) {
			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
			    dmidx);
			kmem_free(bank_curr, sizeof (struct bank_info));
			status = -1;
			goto exit;
		}

		dev->size += (size / NDIMMS);

		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
		    dmidx, size));
	}

	/*
	 * Get the segment by matching the base address, link this bank
	 * to the segment. If not matched, allocate a new segment and
	 * add it at segment list.
	 */
	if (seg_curr = seg_match_base(base)) {
		seg_curr->nbanks++;
		seg_curr->size += size;
		if (ifactor > seg_curr->ifactor)
			seg_curr->ifactor = ifactor;
		bank_curr->seg_id = seg_curr->seg_node.id;
	} else {
		seg_curr = (struct seg_info *)
		kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
		bank_curr->seg_id = seg_id;
		seg_curr->seg_node.id = seg_id++;
		seg_curr->base = base;
		seg_curr->size = size;
		seg_curr->nbanks = 1;
		seg_curr->ifactor = ifactor;
		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);

		nsegments++;
	}

	/* Get the local id of bank which is only unique per segment. */
	bank_curr->local_id = seg_curr->nbanks - 1;

	/* add bank at the end of the list; not sorted by bankid */
	if (seg_curr->hb_inseg != NULL) {
		bank_curr->p_inseg = seg_curr->tb_inseg;
		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
		seg_curr->tb_inseg->n_inseg = bank_curr;
		seg_curr->tb_inseg = bank_curr;
	} else {
		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
	}
	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
	    seg_curr->seg_node.id));

	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);

	memsize += size;
	if (seg_curr->nbanks > maxbanks)
		maxbanks = seg_curr->nbanks;

exit:
	mutex_exit(&mcdatamutex);
	return (status);
}

/*
 * Delete nodes related to the given MC on mc, device group, device,
 * and bank lists. Moreover, delete corresponding segment if its connected
 * banks are all removed.
 */
static void
mlayout_del(int mc_id)
{
	int i, j, dgrpid, devid, bankid, ndevgrps;
	struct seg_info *seg;
	struct bank_info *bank_curr;
	struct mctrl_info *mctrl;
	mc_dlist_t *dgrp_ptr;
	mc_dlist_t *dev_ptr;
	uint64_t base;

	mutex_enter(&mcdatamutex);

	/* delete mctrl_info */
	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
	    NULL) {
		ndevgrps = mctrl->ndevgrps;
		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
		kmem_free(mctrl, sizeof (struct mctrl_info));
		nmcs--;

		/*
		 * There is no other list left for disabled MC.
		 */
		if (ndevgrps == 0) {
			mutex_exit(&mcdatamutex);
			return;
		}
	} else
		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");

	/* Delete device groups and devices of the detached MC */
	for (i = 0; i < NDGRPS; i++) {
		dgrpid = mc_id * NDGRPS + i;
		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
			continue;
		}

		for (j = 0; j < NDIMMS; j++) {
			devid = dgrpid * NDIMMS + j;
			if (dev_ptr = mc_node_get(devid, device_head)) {
				mc_node_del(dev_ptr, &device_head,
				    &device_tail);
				kmem_free(dev_ptr, sizeof (struct device_info));
			} else {
				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
				    devid);
			}
		}

		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
	}

	/* Delete banks and segments if it has no bank */
	for (i = 0; i < NBANKS; i++) {
		bankid = mc_id * NBANKS + i;
		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
		    bank_head))) {
			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
			continue;
		}

		if (bank_curr->valid) {
			base = bank_curr->um & ~bank_curr->uk;
			base <<= MADR_UPA_SHIFT;
			bank_curr->valid = 0;
			memsize -= bank_curr->size;

			/* Delete bank at segment and segment if no bank left */
			if (!(seg = seg_match_base(base))) {
				cmn_err(CE_WARN, "mlayout_del: no seg\n");
				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
				    &bank_tail);
				kmem_free(bank_curr, sizeof (struct bank_info));
				continue;
			}

			/* update the bank list at the segment */
			if (bank_curr->n_inseg == NULL) {
				/* node is at the tail of list */
				seg->tb_inseg = bank_curr->p_inseg;
			} else {
				bank_curr->n_inseg->p_inseg =
				    bank_curr->p_inseg;
			}

			if (bank_curr->p_inseg == NULL) {
				/* node is at the head of list */
				seg->hb_inseg = bank_curr->n_inseg;
			} else {
				bank_curr->p_inseg->n_inseg =
				    bank_curr->n_inseg;
			}

			seg->nbanks--;
			seg->size -= bank_curr->size;

			if (seg->nbanks == 0) {
				mc_node_del((mc_dlist_t *)seg, &seg_head,
				    &seg_tail);
				kmem_free(seg, sizeof (struct seg_info));
				nsegments--;
			}

		}
		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
		kmem_free(bank_curr, sizeof (struct bank_info));
	}	/* end of for loop for four banks */

	mutex_exit(&mcdatamutex);
}

/*
 * Search the segment in the list starting at seg_head by base address
 * input: base address
 * return: pointer of found segment or null if not found.
 */
static struct seg_info *
seg_match_base(u_longlong_t base)
{
	static struct seg_info *seg_ptr;

	seg_ptr = (struct seg_info *)seg_head;
	while (seg_ptr != NULL) {
		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
		    seg_ptr->base, base));
		if (seg_ptr->base == base)
			break;
		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
	}
	return (seg_ptr);
}

/*
 * mc_dlist is a double linking list, including unique id, and pointers to
 * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
 * and mctrl_info has it at the top to share the operations, add, del, and get.
 *
 * The new node is added at the tail and is not sorted.
 *
 * Input: The pointer of node to be added, head and tail of the list
 */

static void
mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
{
	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
		node->id, *head, *tail));

	if (*head != NULL) {
		node->prev = *tail;
		node->next = (*tail)->next;
		(*tail)->next = node;
		*tail = node;
	} else {
		node->next = node->prev = NULL;
		*head = *tail = node;
	}
}

/*
 * Input: The pointer of node to be deleted, head and tail of the list
 *
 * Deleted node will be at the following positions
 * 1. At the tail of the list
 * 2. At the head of the list
 * 3. At the head and tail of the list, i.e. only one left.
 * 4. At the middle of the list
 */

static void
mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
{
	if (node->next == NULL) {
		/* deleted node is at the tail of list */
		*tail = node->prev;
	} else {
		node->next->prev = node->prev;
	}

	if (node->prev == NULL) {
		/* deleted node is at the head of list */
		*head = node->next;
	} else {
		node->prev->next = node->next;
	}
}

/*
 * Search the list from the head of the list to match the given id
 * Input: id and the head of the list
 * Return: pointer of found node
 */
static mc_dlist_t *
mc_node_get(int id, mc_dlist_t *head)
{
	mc_dlist_t *node;

	node = head;
	while (node != NULL) {
		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
		    node->id, id));
		if (node->id == id)
			break;
		node = node->next;
	}
	return (node);
}

/*
 * mc-us3 driver allows a platform to add extra label
 * information to the unum string. If a platform implements a
 * kernel function called plat_add_mem_unum_label() it will be
 * executed. This would typically be implemented in the platmod.
 */
static void
mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
{
	if (&plat_add_mem_unum_label)
		plat_add_mem_unum_label(buf, mcid, bank, dimm);
}