xref: /titanic_52/usr/src/uts/sun4u/io/mc-us3i.c (revision 8bab47abcb471dffa36ddbf409a8ef5303398ddf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/conf.h>
28 #include <sys/ddi.h>
29 #include <sys/stat.h>
30 #include <sys/sunddi.h>
31 #include <sys/ddi_impldefs.h>
32 #include <sys/obpdefs.h>
33 #include <sys/cmn_err.h>
34 #include <sys/errno.h>
35 #include <sys/kmem.h>
36 #include <sys/open.h>
37 #include <sys/thread.h>
38 #include <sys/cpuvar.h>
39 #include <sys/x_call.h>
40 #include <sys/debug.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ivintr.h>
43 #include <sys/intr.h>
44 #include <sys/intreg.h>
45 #include <sys/autoconf.h>
46 #include <sys/modctl.h>
47 #include <sys/spl.h>
48 #include <sys/async.h>
49 #include <sys/mc.h>
50 #include <sys/mc-us3i.h>
51 #include <sys/note.h>
52 #include <sys/cpu_module.h>
53 
54 /*
55  * pm-hardware-state value
56  */
57 #define	NO_SUSPEND_RESUME	"no-suspend-resume"
58 
59 /*
60  * Function prototypes
61  */
62 
63 static int mc_open(dev_t *, int, int, cred_t *);
64 static int mc_close(dev_t, int, int, cred_t *);
65 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
66 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
67 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
68 
69 /*
70  * Configuration data structures
71  */
72 static struct cb_ops mc_cb_ops = {
73 	mc_open,			/* open */
74 	mc_close,			/* close */
75 	nulldev,			/* strategy */
76 	nulldev,			/* print */
77 	nodev,				/* dump */
78 	nulldev,			/* read */
79 	nulldev,			/* write */
80 	mc_ioctl,			/* ioctl */
81 	nodev,				/* devmap */
82 	nodev,				/* mmap */
83 	nodev,				/* segmap */
84 	nochpoll,			/* poll */
85 	ddi_prop_op,			/* cb_prop_op */
86 	0,				/* streamtab */
87 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
88 	CB_REV,				/* rev */
89 	nodev,				/* cb_aread */
90 	nodev				/* cb_awrite */
91 };
92 
93 static struct dev_ops mc_ops = {
94 	DEVO_REV,			/* rev */
95 	0,				/* refcnt  */
96 	ddi_no_info,			/* getinfo */
97 	nulldev,			/* identify */
98 	nulldev,			/* probe */
99 	mc_attach,			/* attach */
100 	mc_detach,			/* detach */
101 	nulldev,			/* reset */
102 	&mc_cb_ops,			/* cb_ops */
103 	(struct bus_ops *)0,		/* bus_ops */
104 	nulldev,			/* power */
105 	ddi_quiesce_not_needed,			/* quiesce */
106 };
107 
108 /*
109  * Driver globals
110  */
111 static void *mcp;
112 static int nmcs = 0;
113 static int seg_id;
114 static int nsegments;
115 static uint64_t	memsize;
116 
117 static uint_t	mc_debug = 0;
118 
119 static int getreg;
120 static int nregs;
121 struct memory_reg_info *reg_info;
122 
123 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
124 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
125 static mc_dlist_t *device_head, *device_tail;
126 
127 static kmutex_t	mcmutex;
128 static kmutex_t	mcdatamutex;
129 
130 extern struct mod_ops mod_driverops;
131 
132 static struct modldrv modldrv = {
133 	&mod_driverops,			/* module type, this one is a driver */
134 	"Memory-controller",		/* module name */
135 	&mc_ops,			/* driver ops */
136 };
137 
138 static struct modlinkage modlinkage = {
139 	MODREV_1,		/* rev */
140 	(void *)&modldrv,
141 	NULL
142 };
143 
144 static int mc_get_memory_reg_info(struct mc_soft_state *softsp);
145 static void mc_construct(struct mc_soft_state *softsp);
146 static void mc_delete(int mc_id);
147 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
148 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
149 static void *mc_node_get(int id, mc_dlist_t *head);
150 static void mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm);
151 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
152     int buflen, int *lenp);
153 static int mc_get_mem_info(int synd_code, uint64_t paddr,
154     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
155     int *segsp, int *banksp, int *mcidp);
156 
157 #pragma weak p2get_mem_unum
158 #pragma weak p2get_mem_info
159 #pragma weak plat_add_mem_unum_label
160 
161 /* For testing only */
162 struct test_unum {
163 	int		synd_code;
164 	uint64_t	paddr;
165 	char 		unum[UNUM_NAMLEN];
166 	int		len;
167 };
168 
169 /*
170  * These are the module initialization routines.
171  */
172 
173 int
174 _init(void)
175 {
176 	int error;
177 
178 	if ((error = ddi_soft_state_init(&mcp,
179 	    sizeof (struct mc_soft_state), 1)) != 0)
180 		return (error);
181 
182 	error =  mod_install(&modlinkage);
183 	if (error == 0) {
184 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
185 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
186 	}
187 
188 	return (error);
189 }
190 
191 int
192 _fini(void)
193 {
194 	int error;
195 
196 	if ((error = mod_remove(&modlinkage)) != 0)
197 		return (error);
198 
199 	ddi_soft_state_fini(&mcp);
200 	mutex_destroy(&mcmutex);
201 	mutex_destroy(&mcdatamutex);
202 	return (0);
203 }
204 
205 int
206 _info(struct modinfo *modinfop)
207 {
208 	return (mod_info(&modlinkage, modinfop));
209 }
210 
211 static int
212 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
213 {
214 	struct mc_soft_state *softsp;
215 	struct dimm_info *dimminfop;
216 	int instance, len, err;
217 	int mcreg1_len;
218 
219 	switch (cmd) {
220 	case DDI_ATTACH:
221 		break;
222 
223 	case DDI_RESUME:
224 		return (DDI_SUCCESS);
225 
226 	default:
227 		return (DDI_FAILURE);
228 	}
229 
230 	instance = ddi_get_instance(devi);
231 
232 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
233 		return (DDI_FAILURE);
234 
235 	softsp = ddi_get_soft_state(mcp, instance);
236 
237 	/* Set the dip in the soft state */
238 	softsp->dip = devi;
239 
240 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
241 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
242 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property\n",
243 		    instance, "portid"));
244 		goto bad;
245 	}
246 
247 	DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: mc %d portid %d, cpuid %d\n",
248 	    instance, softsp->portid, CPU->cpu_id));
249 
250 	/* Get the content of Memory Control Register I from obp */
251 	mcreg1_len = sizeof (uint64_t);
252 	if ((ddi_getlongprop_buf(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
253 	    "memory-control-register-1", (caddr_t)&(softsp->mcreg1),
254 	    &mcreg1_len) == DDI_PROP_SUCCESS) &&
255 	    (mcreg1_len == sizeof (uint64_t))) {
256 		softsp->mcr_read_ok = 1;
257 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d from obp: Reg1: 0x%lx\n",
258 		    instance, softsp->mcreg1));
259 	}
260 
261 	/* attach fails if mcreg1 cannot be accessed */
262 	if (!softsp->mcr_read_ok) {
263 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get mcreg1\n",
264 		    instance));
265 		goto bad;
266 	}
267 
268 	/* nothing to suspend/resume here */
269 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
270 	    "pm-hardware-state", NO_SUSPEND_RESUME,
271 	    sizeof (NO_SUSPEND_RESUME));
272 
273 	/*
274 	 * Get the label of dimms and pin routing information from the
275 	 * memory-layout property of the memory controller.
276 	 */
277 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
278 	    "memory-layout", (caddr_t)&dimminfop, &len);
279 	if (err == DDI_PROP_SUCCESS && dimminfop->table_width == 1) {
280 		/* Set the pointer and size of property in the soft state */
281 		softsp->memlayoutp = dimminfop;
282 		softsp->memlayoutlen = len;
283 	} else {
284 		/*
285 		 * memory-layout property was not found or some other
286 		 * error occured, plat_get_mem_unum() will not work
287 		 * for this mc.
288 		 */
289 		softsp->memlayoutp = NULL;
290 		softsp->memlayoutlen = 0;
291 		DPRINTF(MC_ATTACH_DEBUG,
292 		    ("mc %d: missing or unsupported memory-layout property\n",
293 		    instance));
294 	}
295 
296 	mutex_enter(&mcmutex);
297 
298 	/* Get the physical segments from memory/reg, just once for all MC */
299 	if (!getreg) {
300 		if (mc_get_memory_reg_info(softsp) != 0) {
301 			goto bad1;
302 		}
303 		getreg = 1;
304 	}
305 
306 	/* Construct the physical and logical layout of the MC */
307 	mc_construct(softsp);
308 
309 	if (nmcs == 1) {
310 		if (&p2get_mem_unum)
311 			p2get_mem_unum = mc_get_mem_unum;
312 		if (&p2get_mem_info)
313 			p2get_mem_info = mc_get_mem_info;
314 	}
315 
316 	if (ddi_create_minor_node(devi, "mc-us3i", S_IFCHR, instance,
317 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
318 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
319 		    " failed \n"));
320 		goto bad1;
321 	}
322 	mutex_exit(&mcmutex);
323 
324 	ddi_report_dev(devi);
325 	return (DDI_SUCCESS);
326 
327 bad1:
328 	/* release all allocated data struture for this MC */
329 	mc_delete(softsp->portid);
330 	mutex_exit(&mcmutex);
331 	if (softsp->memlayoutp != NULL)
332 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
333 
334 bad:
335 	cmn_err(CE_WARN, "mc-us3i: attach failed for instance %d\n", instance);
336 	ddi_soft_state_free(mcp, instance);
337 	return (DDI_FAILURE);
338 }
339 
340 /* ARGSUSED */
341 static int
342 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
343 {
344 	int instance;
345 	struct mc_soft_state *softsp;
346 
347 	/* get the instance of this devi */
348 	instance = ddi_get_instance(devi);
349 
350 	/* get the soft state pointer for this device node */
351 	softsp = ddi_get_soft_state(mcp, instance);
352 
353 	switch (cmd) {
354 	case DDI_SUSPEND:
355 		return (DDI_SUCCESS);
356 
357 	case DDI_DETACH:
358 		break;
359 
360 	default:
361 		return (DDI_FAILURE);
362 	}
363 
364 	DPRINTF(MC_DETACH_DEBUG, ("mc %d DETACH: portid %d\n", instance,
365 	    softsp->portid));
366 
367 	mutex_enter(&mcmutex);
368 
369 	/* release all allocated data struture for this MC */
370 	mc_delete(softsp->portid);
371 
372 	if (softsp->memlayoutp != NULL)
373 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
374 
375 	if (nmcs == 0) {
376 		if (&p2get_mem_unum)
377 			p2get_mem_unum = NULL;
378 		if (&p2get_mem_info)
379 			p2get_mem_info = NULL;
380 	}
381 
382 	mutex_exit(&mcmutex);
383 
384 	ddi_remove_minor_node(devi, NULL);
385 	/* free up the soft state */
386 	ddi_soft_state_free(mcp, instance);
387 
388 	return (DDI_SUCCESS);
389 }
390 
391 /* ARGSUSED */
392 static int
393 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
394 {
395 	int status = 0;
396 
397 	/* verify that otyp is appropriate */
398 	if (otyp != OTYP_CHR) {
399 		return (EINVAL);
400 	}
401 
402 	mutex_enter(&mcmutex);
403 	/* At least one attached? */
404 	if (nmcs == 0) {
405 		status = ENXIO;
406 	}
407 	mutex_exit(&mcmutex);
408 
409 	return (status);
410 }
411 
412 /* ARGSUSED */
413 static int
414 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
415 {
416 	return (0);
417 }
418 
419 /*
420  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
421  * MCIOC_CTRLCONF, MCIOC_CONTROL.
422  *
423  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
424  * associated with various length struct. If given number is less than the
425  * number in kernel, update the number and return EINVAL so that user could
426  * allocate enough space for it.
427  *
428  */
429 
430 /* ARGSUSED */
431 static int
432 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
433 	int *rval_p)
434 {
435 	size_t	size;
436 	struct mc_memconf mcmconf;
437 	struct mc_memory *mcmem, mcmem_in;
438 	struct mc_segment *mcseg, mcseg_in;
439 	struct mc_bank mcbank;
440 	struct mc_devgrp mcdevgrp;
441 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
442 	struct mc_control *mccontrol, mccontrol_in;
443 	struct seg_info *seg = NULL;
444 	struct bank_info *bank = NULL;
445 	struct dgrp_info *dgrp = NULL;
446 	struct mctrl_info *mcport;
447 	mc_dlist_t *mctrl;
448 	int i, status = 0;
449 	cpu_t *cpu;
450 
451 	switch (cmd) {
452 	case MCIOC_MEMCONF:
453 		mutex_enter(&mcdatamutex);
454 
455 		mcmconf.nmcs = nmcs;
456 		mcmconf.nsegments = nsegments;
457 		mcmconf.nbanks = NLOGBANKS_PER_SEG;
458 		mcmconf.ndevgrps = NDGRPS_PER_MC;
459 		mcmconf.ndevs = NDIMMS_PER_DGRP;
460 		mcmconf.len_dev = MAX_DEVLEN;
461 		mcmconf.xfer_size = TRANSFER_SIZE;
462 
463 		mutex_exit(&mcdatamutex);
464 
465 		if (copyout(&mcmconf, (void *)arg, sizeof (mcmconf)))
466 			return (EFAULT);
467 		return (0);
468 
469 	/*
470 	 * input: nsegments and allocate space for various length of segmentids
471 	 *
472 	 * return    0: size, number of segments, and all segment ids,
473 	 *		where glocal and local ids are identical.
474 	 *	EINVAL: if the given nsegments is less than that in kernel and
475 	 *		nsegments of struct will be updated.
476 	 *	EFAULT: if other errors in kernel.
477 	 */
478 	case MCIOC_MEM:
479 		if (copyin((void *)arg, &mcmem_in, sizeof (mcmem_in)) != 0)
480 			return (EFAULT);
481 
482 		mutex_enter(&mcdatamutex);
483 		if (mcmem_in.nsegments < nsegments) {
484 			mcmem_in.nsegments = nsegments;
485 			mutex_exit(&mcdatamutex);
486 			if (copyout(&mcmem_in, (void *)arg, sizeof (mcmem_in)))
487 				status = EFAULT;
488 			else
489 				status = EINVAL;
490 
491 			return (status);
492 		}
493 
494 		size = sizeof (*mcmem) + (nsegments - 1) *
495 		    sizeof (mcmem->segmentids[0]);
496 		mcmem = kmem_zalloc(size, KM_SLEEP);
497 
498 		mcmem->size = memsize;
499 		mcmem->nsegments = nsegments;
500 		seg = (struct seg_info *)seg_head;
501 		for (i = 0; i < nsegments; i++) {
502 			ASSERT(seg != NULL);
503 			mcmem->segmentids[i].globalid = seg->seg_node.id;
504 			mcmem->segmentids[i].localid = seg->seg_node.id;
505 			seg = (struct seg_info *)seg->seg_node.next;
506 		}
507 		mutex_exit(&mcdatamutex);
508 
509 		if (copyout(mcmem, (void *)arg, size))
510 			status = EFAULT;
511 
512 		kmem_free(mcmem, size);
513 		return (status);
514 
515 	/*
516 	 * input: id, nbanks and allocate space for various length of bankids
517 	 *
518 	 * return    0: base, size, number of banks, and all bank ids,
519 	 *		where global id is unique of all banks and local id
520 	 *		is only unique for mc.
521 	 *	EINVAL: either id isn't found or if given nbanks is less than
522 	 *		that in kernel and nbanks of struct will be updated.
523 	 *	EFAULT: if other errors in kernel.
524 	 */
525 	case MCIOC_SEG:
526 
527 		if (copyin((void *)arg, &mcseg_in, sizeof (mcseg_in)) != 0)
528 			return (EFAULT);
529 
530 		mutex_enter(&mcdatamutex);
531 		if ((seg = mc_node_get(mcseg_in.id, seg_head)) == NULL) {
532 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
533 			    "id %d\n", mcseg_in.id));
534 			mutex_exit(&mcdatamutex);
535 			return (EFAULT);
536 		}
537 
538 		if (mcseg_in.nbanks < seg->nbanks) {
539 			mcseg_in.nbanks = seg->nbanks;
540 			mutex_exit(&mcdatamutex);
541 			if (copyout(&mcseg_in, (void *)arg, sizeof (mcseg_in)))
542 				status = EFAULT;
543 			else
544 				status = EINVAL;
545 
546 			return (status);
547 		}
548 
549 		size = sizeof (*mcseg) + (seg->nbanks - 1) *
550 		    sizeof (mcseg->bankids[0]);
551 		mcseg = kmem_zalloc(size, KM_SLEEP);
552 
553 		mcseg->id = seg->seg_node.id;
554 		mcseg->ifactor = seg->ifactor;
555 		mcseg->base = seg->base;
556 		mcseg->size = seg->size;
557 		mcseg->nbanks = seg->nbanks;
558 
559 		bank = seg->head;
560 
561 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg %p bank %p\n",
562 		    seg->nbanks, (void *) seg, (void *) bank));
563 
564 		i = 0;
565 		while (bank != NULL) {
566 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
567 			    i, bank->bank_node.id));
568 			mcseg->bankids[i].globalid = bank->bank_node.id;
569 			mcseg->bankids[i++].localid = bank->local_id;
570 			bank = bank->next;
571 		}
572 		ASSERT(i == seg->nbanks);
573 		mutex_exit(&mcdatamutex);
574 
575 		if (copyout(mcseg, (void *)arg, size))
576 			status = EFAULT;
577 
578 		kmem_free(mcseg, size);
579 		return (status);
580 
581 	/*
582 	 * input: id
583 	 *
584 	 * return    0: mask, match, size, and devgrpid,
585 	 *		where global id is unique of all devgrps and local id
586 	 *		is only unique for mc.
587 	 *	EINVAL: if id isn't found
588 	 *	EFAULT: if other errors in kernel.
589 	 */
590 	case MCIOC_BANK:
591 		if (copyin((void *)arg, &mcbank, sizeof (mcbank)) != 0)
592 			return (EFAULT);
593 
594 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
595 
596 		mutex_enter(&mcdatamutex);
597 
598 		if ((bank = mc_node_get(mcbank.id, bank_head)) == NULL) {
599 			mutex_exit(&mcdatamutex);
600 			return (EINVAL);
601 		}
602 
603 		mcbank.mask = bank->mask;
604 		mcbank.match = bank->match;
605 		mcbank.size = bank->size;
606 		mcbank.devgrpid.globalid = bank->devgrp_id;
607 		mcbank.devgrpid.localid =
608 		    bank->bank_node.id % NLOGBANKS_PER_SEG;
609 
610 		mutex_exit(&mcdatamutex);
611 
612 		if (copyout(&mcbank, (void *)arg, sizeof (mcbank)))
613 			return (EFAULT);
614 		return (0);
615 
616 	/*
617 	 * input:id and allocate space for various length of deviceids
618 	 *
619 	 * return    0: size and number of devices.
620 	 *	EINVAL: id isn't found
621 	 *	EFAULT: if other errors in kernel.
622 	 */
623 	case MCIOC_DEVGRP:
624 
625 		if (copyin((void *)arg, &mcdevgrp, sizeof (mcdevgrp)) != 0)
626 			return (EFAULT);
627 
628 		mutex_enter(&mcdatamutex);
629 		if ((dgrp = mc_node_get(mcdevgrp.id, dgrp_head)) == NULL) {
630 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
631 			    "%d\n", mcdevgrp.id));
632 			mutex_exit(&mcdatamutex);
633 			return (EINVAL);
634 		}
635 
636 		mcdevgrp.ndevices = dgrp->ndevices;
637 		mcdevgrp.size = dgrp->size;
638 
639 		mutex_exit(&mcdatamutex);
640 
641 		if (copyout(&mcdevgrp, (void *)arg, sizeof (mcdevgrp)))
642 			status = EFAULT;
643 
644 		return (status);
645 
646 	/*
647 	 * input: nmcs and allocate space for various length of mcids
648 	 *
649 	 * return    0: number of mc, and all mcids,
650 	 *		where glocal and local ids are identical.
651 	 *	EINVAL: if the given nmcs is less than that in kernel and
652 	 *		nmcs of struct will be updated.
653 	 *	EFAULT: if other errors in kernel.
654 	 */
655 	case MCIOC_CTRLCONF:
656 		if (copyin((void *)arg, &mcctrlconf_in,
657 		    sizeof (mcctrlconf_in)) != 0)
658 			return (EFAULT);
659 
660 		mutex_enter(&mcdatamutex);
661 		if (mcctrlconf_in.nmcs < nmcs) {
662 			mcctrlconf_in.nmcs = nmcs;
663 			mutex_exit(&mcdatamutex);
664 			if (copyout(&mcctrlconf_in, (void *)arg,
665 			    sizeof (mcctrlconf_in)))
666 				status = EFAULT;
667 			else
668 				status = EINVAL;
669 
670 			return (status);
671 		}
672 
673 		/*
674 		 * Cannot just use the size of the struct because of the various
675 		 * length struct
676 		 */
677 		size = sizeof (*mcctrlconf) + ((nmcs - 1) *
678 		    sizeof (mcctrlconf->mcids[0]));
679 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
680 
681 		mcctrlconf->nmcs = nmcs;
682 
683 		/* Get all MC ids and add to mcctrlconf */
684 		mctrl = mctrl_head;
685 		i = 0;
686 		while (mctrl != NULL) {
687 			mcctrlconf->mcids[i].globalid = mctrl->id;
688 			mcctrlconf->mcids[i].localid = mctrl->id;
689 			i++;
690 			mctrl = mctrl->next;
691 		}
692 		ASSERT(i == nmcs);
693 
694 		mutex_exit(&mcdatamutex);
695 
696 		if (copyout(mcctrlconf, (void *)arg, size))
697 			status = EFAULT;
698 
699 		kmem_free(mcctrlconf, size);
700 		return (status);
701 
702 	/*
703 	 * input:id, ndevgrps and allocate space for various length of devgrpids
704 	 *
705 	 * return    0: number of devgrp, and all devgrpids,
706 	 *		is unique of all devgrps and local id is only unique
707 	 *		for mc.
708 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
709 	 *		less than that in kernel and ndevgrps of struct will
710 	 *		be updated.
711 	 *	EFAULT: if other errors in kernel.
712 	 */
713 	case MCIOC_CONTROL:
714 		if (copyin((void *)arg, &mccontrol_in,
715 		    sizeof (mccontrol_in)) != 0)
716 			return (EFAULT);
717 
718 		mutex_enter(&mcdatamutex);
719 		if ((mcport = mc_node_get(mccontrol_in.id,
720 		    mctrl_head)) == NULL) {
721 			mutex_exit(&mcdatamutex);
722 			return (EINVAL);
723 		}
724 
725 		/*
726 		 * mcport->ndevgrps zero means Memory Controller is disable.
727 		 */
728 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
729 		    (mcport->ndevgrps == 0)) {
730 			mccontrol_in.ndevgrps = mcport->ndevgrps;
731 			mutex_exit(&mcdatamutex);
732 			if (copyout(&mccontrol_in, (void *)arg,
733 			    sizeof (mccontrol_in)))
734 				status = EFAULT;
735 			else if (mcport->ndevgrps != 0)
736 				status = EINVAL;
737 
738 			return (status);
739 		}
740 
741 		size = sizeof (*mccontrol) + (mcport->ndevgrps - 1) *
742 		    sizeof (mccontrol->devgrpids[0]);
743 		mccontrol = kmem_zalloc(size, KM_SLEEP);
744 
745 		mccontrol->id = mcport->mctrl_node.id;
746 		mccontrol->ndevgrps = mcport->ndevgrps;
747 		for (i = 0; i < mcport->ndevgrps; i++) {
748 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
749 			mccontrol->devgrpids[i].localid =
750 			    mcport->devgrpids[i] % NDGRPS_PER_MC;
751 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %d\n",
752 			    i));
753 		}
754 		mutex_exit(&mcdatamutex);
755 
756 		if (copyout(mccontrol, (void *)arg, size))
757 			status = EFAULT;
758 
759 		kmem_free(mccontrol, size);
760 		return (status);
761 
762 	/*
763 	 * input:id
764 	 *
765 	 * return    0: CPU flushed successfully.
766 	 *	EINVAL: the id wasn't found
767 	 */
768 	case MCIOC_ECFLUSH:
769 		mutex_enter(&cpu_lock);
770 		cpu = cpu_get((processorid_t)arg);
771 		mutex_exit(&cpu_lock);
772 		if (cpu == NULL)
773 			return (EINVAL);
774 
775 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
776 
777 		return (0);
778 
779 	default:
780 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
781 		return (EFAULT);
782 	}
783 }
784 
785 /*
786  * Gets the reg property from the memory node. This provides the various
787  * memory segments, at bank-boundries, dimm-pair boundries, in the form
788  * of [base, size] pairs. Continuous segments, spanning boundries are
789  * merged into one.
790  * Returns 0 for success and -1 for failure.
791  */
792 static int
793 mc_get_memory_reg_info(struct mc_soft_state *softsp)
794 {
795 	dev_info_t *devi;
796 	int len;
797 	int i;
798 	struct memory_reg_info *mregi;
799 
800 	_NOTE(ARGUNUSED(softsp))
801 
802 	if ((devi = ddi_find_devinfo("memory", -1, 0)) == NULL) {
803 		DPRINTF(MC_REG_DEBUG,
804 		    ("mc-us3i: cannot find memory node under root\n"));
805 		return (-1);
806 	}
807 
808 	if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
809 	    "reg", (caddr_t)&reg_info, &len) != DDI_PROP_SUCCESS) {
810 		DPRINTF(MC_REG_DEBUG,
811 		    ("mc-us3i: reg undefined under memory\n"));
812 		return (-1);
813 	}
814 
815 	nregs = len/sizeof (*mregi);
816 
817 	DPRINTF(MC_REG_DEBUG, ("mc_get_memory_reg_info: nregs %d"
818 	    "reg_info %p\n", nregs, (void *) reg_info));
819 
820 	mregi = reg_info;
821 
822 	/* debug printfs  */
823 	for (i = 0; i < nregs; i++) {
824 		DPRINTF(MC_REG_DEBUG, (" [0x%lx, 0x%lx] ",
825 		    mregi->base, mregi->size));
826 		mregi++;
827 	}
828 
829 	return (0);
830 }
831 
832 /*
833  * Initialize a logical bank
834  */
835 static struct bank_info *
836 mc_add_bank(int bankid, uint64_t mask, uint64_t match, uint64_t size,
837     int dgrpid)
838 {
839 	struct bank_info *banki;
840 
841 	if ((banki = mc_node_get(bankid, bank_head)) != NULL) {
842 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: bank %d exists\n",
843 		    bankid));
844 		return (banki);
845 	}
846 
847 	banki = kmem_zalloc(sizeof (*banki), KM_SLEEP);
848 
849 	banki->bank_node.id = bankid;
850 	banki->devgrp_id = dgrpid;
851 	banki->mask = mask;
852 	banki->match = match;
853 	banki->base = match;
854 	banki->size = size;
855 
856 	mc_node_add((mc_dlist_t *)banki, &bank_head, &bank_tail);
857 
858 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: id %d mask 0x%lx match 0x%lx"
859 	    " base 0x%lx size 0x%lx\n", bankid, mask, match,
860 	    banki->base, banki->size));
861 
862 	return (banki);
863 }
864 
865 /*
866  * Use the bank's base address to find out whether to initialize a new segment,
867  * or weave the bank into an existing segment. If the tail bank of a previous
868  * segment is not continuous with the new bank, the new bank goes into a new
869  * segment.
870  */
871 static void
872 mc_add_segment(struct bank_info *banki)
873 {
874 	struct seg_info *segi;
875 	struct bank_info *tb;
876 
877 	/* does this bank start a new segment? */
878 	if ((segi = mc_node_get(seg_id, seg_head)) == NULL) {
879 		/* this should happen for the first segment only */
880 		goto new_seg;
881 	}
882 
883 	tb = segi->tail;
884 	/* discontiguous banks go into a new segment, increment the seg_id */
885 	if (banki->base > (tb->base + tb->size)) {
886 		seg_id++;
887 		goto new_seg;
888 	}
889 
890 	/* weave the bank into the segment */
891 	segi->nbanks++;
892 	tb->next = banki;
893 
894 	banki->seg_id = segi->seg_node.id;
895 	banki->local_id = tb->local_id + 1;
896 
897 	/* contiguous or interleaved? */
898 	if (banki->base != (tb->base + tb->size))
899 		segi->ifactor++;
900 
901 	segi->size += banki->size;
902 	segi->tail = banki;
903 
904 	memsize += banki->size;
905 
906 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d add bank: id %d"
907 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
908 	    banki->size));
909 
910 	return;
911 
912 new_seg:
913 	segi = kmem_zalloc(sizeof (*segi), KM_SLEEP);
914 
915 	segi->seg_node.id = seg_id;
916 	segi->nbanks = 1;
917 	segi->ifactor = 1;
918 	segi->base = banki->base;
919 	segi->size = banki->size;
920 	segi->head = banki;
921 	segi->tail = banki;
922 
923 	banki->seg_id = segi->seg_node.id;
924 	banki->local_id = 0;
925 
926 	mc_node_add((mc_dlist_t *)segi, &seg_head, &seg_tail);
927 	nsegments++;
928 
929 	memsize += banki->size;
930 
931 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d new bank: id %d"
932 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
933 	    banki->size));
934 }
935 
936 /*
937  * Returns the address bit number (row index) that controls the logical/external
938  * bank assignment in interleave of kind internal-external same dimm-pair,
939  * internal-external both dimm-pair. This is done by using the dimm-densities
940  * and part-type.
941  */
942 static int
943 get_row_shift(int row_index, struct dgrp_info *dgrp)
944 {
945 	int shift;
946 
947 	switch (dgrp->base_device) {
948 	case BASE_DEVICE_128Mb:
949 	case BASE_DEVICE_256Mb:
950 		/* 128Mb and 256Mb devices have same bank select mask */
951 		shift = ADDR_GEN_128Mb_X8_ROW_0;
952 		break;
953 	case BASE_DEVICE_512Mb:
954 	case BASE_DEVICE_1Gb:
955 		/* 512 and 1Gb devices have same bank select mask */
956 		shift = ADDR_GEN_512Mb_X8_ROW_0;
957 		break;
958 	}
959 
960 	if (dgrp->part_type == PART_TYPE_X4)
961 		shift += 1;
962 
963 	shift += row_index;
964 
965 	return (shift);
966 }
967 
968 
969 static void
970 get_device_select(int interleave, struct dgrp_info *dgrp,
971     int *ds_shift, int *bs_shift)
972 {
973 
974 	switch (interleave) {
975 	case INTERLEAVE_DISABLE:
976 	/* Fall Through */
977 	case INTERLEAVE_INTERNAL:
978 		/* Bit 33 selects the dimm group/pair */
979 		*ds_shift = DIMM_PAIR_SELECT_SHIFT;
980 		if (dgrp->nlogbanks == 2) {
981 			/* Bit 32 selects the logical bank */
982 			*bs_shift = LOG_BANK_SELECT_SHIFT;
983 		}
984 		break;
985 	case INTERLEAVE_INTEXT_SAME_DIMM_PAIR:
986 		/* Bit 33 selects the dimm group/pair */
987 		*ds_shift =  DIMM_PAIR_SELECT_SHIFT;
988 		if (dgrp->nlogbanks == 2) {
989 			/* Row[2] selects the logical bank */
990 			*bs_shift = get_row_shift(2, dgrp);
991 		}
992 		break;
993 	case INTERLEAVE_INTEXT_BOTH_DIMM_PAIR:
994 		if (dgrp->nlogbanks == 2) {
995 			/* Row[3] selects the dimm group/pair */
996 			*ds_shift = get_row_shift(3, dgrp);
997 
998 			/* Row[2] selects the logical bank */
999 			*bs_shift = get_row_shift(2, dgrp);
1000 		} else {
1001 			/* Row[2] selects the dimm group/pair */
1002 			*ds_shift = get_row_shift(2, dgrp);
1003 		}
1004 		break;
1005 	}
1006 }
1007 
1008 static void
1009 mc_add_xor_banks(struct mctrl_info *mctrl,
1010     uint64_t mask, uint64_t match, int interleave)
1011 {
1012 	int i, j, nbits, nbanks;
1013 	int bankid;
1014 	int dselect[4];
1015 	int ds_shift = -1, bs_shift = -1;
1016 	uint64_t id, size, xmatch;
1017 	struct bank_info *banki;
1018 	struct dgrp_info *dgrp;
1019 
1020 	/* xor mode - assume 2 identical dimm-pairs */
1021 	if ((dgrp = mc_node_get(mctrl->devgrpids[0], dgrp_head)) == NULL) {
1022 		return;
1023 	}
1024 
1025 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1026 
1027 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1028 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1029 
1030 	/* xor enable means, bit 21 is used for dimm-pair select */
1031 	mask |= XOR_DEVICE_SELECT_MASK;
1032 	if (dgrp->nlogbanks == NLOGBANKS_PER_DGRP) {
1033 		/* bit 20 is used for logbank select */
1034 		mask |= XOR_BANK_SELECT_MASK;
1035 	}
1036 
1037 	/* find out the bits set to 1 in mask, nbits can be 2 or 4 */
1038 	nbits = 0;
1039 	for (i = 0; i <= DIMM_PAIR_SELECT_SHIFT; i++) {
1040 		if ((((mask >> i) & 1) == 1) && (nbits < 4)) {
1041 			dselect[nbits] = i;
1042 			nbits++;
1043 		}
1044 	}
1045 
1046 	/* number or banks can be 4 or 16 */
1047 	nbanks = 1 << nbits;
1048 
1049 	size = (dgrp->size * 2)/nbanks;
1050 
1051 	bankid = mctrl->mctrl_node.id * NLOGBANKS_PER_MC;
1052 
1053 	/* each bit position of the mask decides the match & base for bank */
1054 	for (i = 0; i < nbanks; i++) {
1055 		xmatch = 0;
1056 		for (j = 0; j < nbits; j++) {
1057 			xmatch |= (i & (1ULL << j)) << (dselect[j] - j);
1058 		}
1059 		/* xor ds bits to get the dimm-pair */
1060 		id = ((xmatch & (1ULL << ds_shift)) >> ds_shift) ^
1061 		    ((xmatch & (1ULL << XOR_DEVICE_SELECT_SHIFT)) >>
1062 		    XOR_DEVICE_SELECT_SHIFT);
1063 		banki = mc_add_bank(bankid, mask, match | xmatch, size,
1064 		    mctrl->devgrpids[id]);
1065 		mc_add_segment(banki);
1066 		bankid++;
1067 	}
1068 }
1069 
1070 /*
1071  * Based on interleave, dimm-densities, part-type determine the mask
1072  * and match per bank, construct the logical layout by adding segments
1073  * and banks
1074  */
1075 static int
1076 mc_add_dgrp_banks(uint64_t bankid, uint64_t dgrpid,
1077     uint64_t mask, uint64_t match, int interleave)
1078 {
1079 	int nbanks = 0;
1080 	struct bank_info *banki;
1081 	struct dgrp_info *dgrp;
1082 	int ds_shift = -1, bs_shift = -1;
1083 	uint64_t size;
1084 	uint64_t match_save;
1085 
1086 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) == NULL) {
1087 		return (0);
1088 	}
1089 
1090 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1091 
1092 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1093 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1094 	match |= (ds_shift == -1 ? 0 : ((dgrpid & 1) << ds_shift));
1095 	match_save = match;
1096 	size = dgrp->size/dgrp->nlogbanks;
1097 
1098 	/* for bankid 0, 2, 4 .. */
1099 	match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1100 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1101 	    " mask 0x%lx bs_shift %d match 0x%lx\n",
1102 	    interleave, mask, bs_shift, match));
1103 	banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1104 	nbanks++;
1105 	mc_add_segment(banki);
1106 
1107 	if (dgrp->nlogbanks == 2) {
1108 		/*
1109 		 * Set match value to original before adding second
1110 		 * logical bank interleaving information.
1111 		 */
1112 		match = match_save;
1113 		bankid++;
1114 		match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1115 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1116 		    " mask 0x%lx shift %d match 0x%lx\n",
1117 		    interleave, mask, bs_shift, match));
1118 		banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1119 		nbanks++;
1120 		mc_add_segment(banki);
1121 	}
1122 
1123 	return (nbanks);
1124 }
1125 
1126 /*
1127  * Construct the logical layout
1128  */
1129 static void
1130 mc_logical_layout(struct mctrl_info *mctrl, struct mc_soft_state *softsp)
1131 {
1132 	int i;
1133 	uint64_t mcid, bankid, interleave, mask, match;
1134 
1135 	if (mctrl->ndevgrps == 0)
1136 		return;
1137 
1138 	mcid = mctrl->mctrl_node.id;
1139 	mask = MC_SELECT_MASK;
1140 	match = mcid << MC_SELECT_SHIFT;
1141 
1142 	interleave = (softsp->mcreg1 & MCREG1_INTERLEAVE_MASK) >>
1143 	    MCREG1_INTERLEAVE_SHIFT;
1144 
1145 	/* Two dimm pairs and xor bit set */
1146 	if (mctrl->ndevgrps == NDGRPS_PER_MC &&
1147 	    (softsp->mcreg1 & MCREG1_XOR_ENABLE)) {
1148 		mc_add_xor_banks(mctrl, mask, match, interleave);
1149 		return;
1150 	}
1151 
1152 	/*
1153 	 * For xor bit unset or only one dimm pair.
1154 	 * In one dimm pair case, even if xor bit is set, xor
1155 	 * interleaving is only taking place in dimm's internal
1156 	 * banks. Dimm and external bank select bits are the
1157 	 * same as those without xor bit set.
1158 	 */
1159 	bankid = mcid * NLOGBANKS_PER_MC;
1160 	for (i = 0; i < mctrl->ndevgrps; i++) {
1161 		bankid += mc_add_dgrp_banks(bankid, mctrl->devgrpids[i],
1162 		    mask, match, interleave);
1163 	}
1164 }
1165 
1166 /*
1167  * Get the dimm-pair's size from the reg_info
1168  */
1169 static uint64_t
1170 get_devgrp_size(uint64_t start)
1171 {
1172 	int i;
1173 	uint64_t size;
1174 	uint64_t end, reg_start, reg_end;
1175 	struct memory_reg_info *regi;
1176 
1177 	/* dgrp end address */
1178 	end = start + DGRP_SIZE_MAX - 1;
1179 
1180 	regi = reg_info;
1181 	size = 0;
1182 	for (i = 0; i < nregs; i++) {
1183 		reg_start = regi->base;
1184 		reg_end = regi->base + regi->size - 1;
1185 
1186 		/* completely outside */
1187 		if ((reg_end < start) || (reg_start > end)) {
1188 			regi++;
1189 			continue;
1190 		}
1191 
1192 		/* completely inside */
1193 		if ((reg_start <= start) && (reg_end >= end)) {
1194 			return (DGRP_SIZE_MAX);
1195 		}
1196 
1197 		/* start is inside, but not the end, get the remainder */
1198 		if (reg_start < start) {
1199 			size = regi->size - (start - reg_start);
1200 			regi++;
1201 			continue;
1202 		}
1203 
1204 		/* add up size for all within range */
1205 		size += regi->size;
1206 		regi++;
1207 	}
1208 
1209 	return (size);
1210 }
1211 
1212 /*
1213  * Each device group is a pair (dimm-pair) of identical single/dual dimms.
1214  * Determine the dimm-pair's dimm-densities and part-type using the MCR-I.
1215  */
1216 static void
1217 mc_add_devgrp(int dgrpid, struct mc_soft_state *softsp)
1218 {
1219 	int i, mcid, devid, dgrpoffset;
1220 	struct dgrp_info *dgrp;
1221 	struct device_info *dev;
1222 	struct dimm_info *dimmp = (struct dimm_info *)softsp->memlayoutp;
1223 
1224 	mcid = softsp->portid;
1225 
1226 	/* add the entry on dgrp_info list */
1227 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) != NULL) {
1228 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: devgrp %d exists\n",
1229 		    dgrpid));
1230 		return;
1231 	}
1232 
1233 	dgrp = kmem_zalloc(sizeof (*dgrp), KM_SLEEP);
1234 
1235 	dgrp->dgrp_node.id = dgrpid;
1236 
1237 	/* a devgrp has identical (type & size) pair */
1238 	if ((dgrpid & 1) == 0) {
1239 		/* dimm-pair 0, 2, 4, 6 */
1240 		if (softsp->mcreg1 & MCREG1_DIMM1_BANK1)
1241 			dgrp->nlogbanks = 2;
1242 		else
1243 			dgrp->nlogbanks = 1;
1244 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN1_MASK) >>
1245 		    MCREG1_ADDRGEN1_SHIFT;
1246 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM1_MASK) >>
1247 		    MCREG1_X4DIMM1_SHIFT;
1248 	} else {
1249 		/* dimm-pair 1, 3, 5, 7 */
1250 		if (softsp->mcreg1 & MCREG1_DIMM2_BANK3)
1251 			dgrp->nlogbanks = 2;
1252 		else
1253 			dgrp->nlogbanks = 1;
1254 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN2_MASK) >>
1255 		    MCREG1_ADDRGEN2_SHIFT;
1256 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM2_MASK) >>
1257 		    MCREG1_X4DIMM2_SHIFT;
1258 	}
1259 
1260 	dgrp->base = MC_BASE(mcid) + DGRP_BASE(dgrpid);
1261 	dgrp->size = get_devgrp_size(dgrp->base);
1262 
1263 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: id %d size %ld logbanks %d"
1264 	    " base_device %d part_type %d\n", dgrpid, dgrp->size,
1265 	    dgrp->nlogbanks, dgrp->base_device, dgrp->part_type));
1266 
1267 	dgrpoffset = dgrpid % NDGRPS_PER_MC;
1268 	dgrp->ndevices = NDIMMS_PER_DGRP;
1269 	/* add the entry for the (identical) pair of dimms/device */
1270 	for (i = 0; i < NDIMMS_PER_DGRP; i++) {
1271 		devid = dgrpid * NDIMMS_PER_DGRP + i;
1272 		dgrp->deviceids[i] = devid;
1273 
1274 		if ((dev = mc_node_get(devid, device_head)) != NULL) {
1275 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: device %d "
1276 			    "exists\n", devid));
1277 			continue;
1278 		}
1279 
1280 		dev = kmem_zalloc(sizeof (*dev), KM_SLEEP);
1281 
1282 		dev->dev_node.id = devid;
1283 
1284 		dev->size = dgrp->size/2;
1285 
1286 		if (dimmp) {
1287 			(void) strncpy(dev->label, (char *)dimmp->label[
1288 			    i + NDIMMS_PER_DGRP * dgrpoffset],
1289 			    MAX_DEVLEN);
1290 
1291 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: dimm %d %s\n",
1292 			    dev->dev_node.id, dev->label));
1293 		}
1294 
1295 		mc_node_add((mc_dlist_t *)dev, &device_head, &device_tail);
1296 	}
1297 
1298 	mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1299 }
1300 
1301 /*
1302  * Construct the physical and logical layout
1303  */
1304 static void
1305 mc_construct(struct mc_soft_state *softsp)
1306 {
1307 	int i, mcid, dgrpid;
1308 	struct mctrl_info *mctrl;
1309 
1310 	mcid = softsp->portid;
1311 
1312 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mcid %d, mcreg1 0x%lx\n",
1313 	    mcid, softsp->mcreg1));
1314 
1315 	/*
1316 	 * Construct the Physical & Logical Layout
1317 	 */
1318 	mutex_enter(&mcdatamutex);
1319 
1320 	/* allocate for mctrl_info */
1321 	if ((mctrl = mc_node_get(mcid, mctrl_head)) != NULL) {
1322 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mctrl %d exists\n",
1323 		    mcid));
1324 		mutex_exit(&mcdatamutex);
1325 		return;
1326 	}
1327 
1328 	mctrl = kmem_zalloc(sizeof (*mctrl), KM_SLEEP);
1329 
1330 	mctrl->mctrl_node.id = mcid;
1331 
1332 	i = 0;
1333 	dgrpid = mcid * NDGRPS_PER_MC;
1334 	if (softsp->mcreg1 & MCREG1_DIMM1_BANK0) {
1335 		mc_add_devgrp(dgrpid, softsp);
1336 		mctrl->devgrpids[i] = dgrpid;
1337 		mctrl->ndevgrps++;
1338 		i++;
1339 	}
1340 
1341 	if (softsp->mcreg1 & MCREG1_DIMM2_BANK2) {
1342 		dgrpid++;
1343 		mc_add_devgrp(dgrpid, softsp);
1344 		mctrl->devgrpids[i] = dgrpid;
1345 		mctrl->ndevgrps++;
1346 	}
1347 
1348 	mc_logical_layout(mctrl, softsp);
1349 
1350 	mctrl->dimminfop = (struct dimm_info *)softsp->memlayoutp;
1351 
1352 	nmcs++;
1353 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1354 
1355 	mutex_exit(&mcdatamutex);
1356 
1357 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: nmcs %d memsize %ld"
1358 	    "nsegments %d\n", nmcs, memsize, nsegments));
1359 }
1360 
1361 /*
1362  * Delete nodes related to the given MC on mc, device group, device,
1363  * and bank lists. Moreover, delete corresponding segment if its connected
1364  * banks are all removed.
1365  */
1366 static void
1367 mc_delete(int mc_id)
1368 {
1369 	int i, j, dgrpid, devid, bankid;
1370 	struct mctrl_info *mctrl;
1371 	struct dgrp_info *dgrp;
1372 	struct device_info *devp;
1373 	struct seg_info *segi;
1374 	struct bank_info *banki;
1375 
1376 	mutex_enter(&mcdatamutex);
1377 
1378 	/* delete mctrl_info */
1379 	if ((mctrl = mc_node_get(mc_id, mctrl_head)) != NULL) {
1380 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1381 		kmem_free(mctrl, sizeof (*mctrl));
1382 		nmcs--;
1383 	} else
1384 		DPRINTF(MC_DESTRC_DEBUG, ("mc_delete: mctrl is not found\n"));
1385 
1386 	/* delete device groups and devices of the detached MC */
1387 	for (i = 0; i < NDGRPS_PER_MC; i++) {
1388 		dgrpid = mc_id * NDGRPS_PER_MC + i;
1389 		if (!(dgrp = mc_node_get(dgrpid, dgrp_head))) {
1390 			continue;
1391 		}
1392 
1393 		for (j = 0; j < NDIMMS_PER_DGRP; j++) {
1394 			devid = dgrpid * NDIMMS_PER_DGRP + j;
1395 			if (devp = mc_node_get(devid, device_head)) {
1396 				mc_node_del((mc_dlist_t *)devp,
1397 				    &device_head, &device_tail);
1398 				kmem_free(devp, sizeof (*devp));
1399 			} else
1400 				DPRINTF(MC_DESTRC_DEBUG,
1401 				    ("mc_delete: no dev %d\n", devid));
1402 		}
1403 
1404 		mc_node_del((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1405 		kmem_free(dgrp, sizeof (*dgrp));
1406 	}
1407 
1408 	/* delete all banks and associated segments */
1409 	for (i = 0; i < NLOGBANKS_PER_MC; i++) {
1410 		bankid = mc_id * NLOGBANKS_PER_MC + i;
1411 		if (!(banki = mc_node_get(bankid, bank_head))) {
1412 			continue;
1413 		}
1414 
1415 		/* bank and segments go together */
1416 		if ((segi = mc_node_get(banki->seg_id, seg_head)) != NULL) {
1417 			mc_node_del((mc_dlist_t *)segi, &seg_head, &seg_tail);
1418 			kmem_free(segi, sizeof (*segi));
1419 			nsegments--;
1420 		}
1421 
1422 		mc_node_del((mc_dlist_t *)banki, &bank_head, &bank_tail);
1423 		kmem_free(banki, sizeof (*banki));
1424 	}
1425 
1426 	mutex_exit(&mcdatamutex);
1427 }
1428 
1429 /*
1430  * mc_dlist is a double linking list, including unique id, and pointers to
1431  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1432  * and mctrl_info has it at the top to share the operations, add, del, and get.
1433  *
1434  * The new node is added at the tail and is not sorted.
1435  *
1436  * Input: The pointer of node to be added, head and tail of the list
1437  */
1438 
1439 static void
1440 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1441 {
1442 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1443 	    node->id, (void *) *head, (void *) *tail));
1444 
1445 	if (*head != NULL) {
1446 		node->prev = *tail;
1447 		node->next = (*tail)->next;
1448 		(*tail)->next = node;
1449 		*tail = node;
1450 	} else {
1451 		node->next = node->prev = NULL;
1452 		*head = *tail = node;
1453 	}
1454 }
1455 
1456 /*
1457  * Input: The pointer of node to be deleted, head and tail of the list
1458  *
1459  * Deleted node will be at the following positions
1460  * 1. At the tail of the list
1461  * 2. At the head of the list
1462  * 3. At the head and tail of the list, i.e. only one left.
1463  * 4. At the middle of the list
1464  */
1465 
1466 static void
1467 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1468 {
1469 	if (node->next == NULL) {
1470 		/* deleted node is at the tail of list */
1471 		*tail = node->prev;
1472 	} else {
1473 		node->next->prev = node->prev;
1474 	}
1475 
1476 	if (node->prev == NULL) {
1477 		/* deleted node is at the head of list */
1478 		*head = node->next;
1479 	} else {
1480 		node->prev->next = node->next;
1481 	}
1482 }
1483 
1484 /*
1485  * Search the list from the head of the list to match the given id
1486  * Input: id and the head of the list
1487  * Return: pointer of found node
1488  */
1489 static void *
1490 mc_node_get(int id, mc_dlist_t *head)
1491 {
1492 	mc_dlist_t *node;
1493 
1494 	node = head;
1495 	while (node != NULL) {
1496 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1497 		    node->id, id));
1498 		if (node->id == id)
1499 			break;
1500 		node = node->next;
1501 	}
1502 	return (node);
1503 }
1504 
1505 /*
1506  * Memory subsystem provides 144 bits (128 Data bits, 9 ECC bits and 7
1507  * unused bits) interface via a pair of DIMMs. Mapping of Data/ECC bits
1508  * to a specific DIMM pin is described by the memory-layout property
1509  * via two tables: dimm table and pin table.
1510  *
1511  * Memory-layout property arranges data/ecc bits in the following order:
1512  *
1513  *   Bit#  143                          16 15       7 6           0
1514  *        |      Data[127:0]              | ECC[8:0] | Unused[6:0] |
1515  *
1516  * dimm table: 1 bit is used to store DIMM number (2 possible DIMMs) for
1517  *	each Data/ECC bit. Thus, it needs 18 bytes (144/8) to represent
1518  *	all Data/ECC bits in this table. Information is stored in big
1519  *	endian order, i.e. dimm_table[0] represents information for
1520  *	logical bit# 143 to 136.
1521  *
1522  * pin table: 1 byte is used to store pin position for each Data/ECC bit.
1523  *	Thus, this table is 144 bytes long. Information is stored in little
1524  *	endian order, i.e, pin_table[0] represents pin number of logical
1525  *	bit 0 and pin_table[143] contains pin number for logical bit 143
1526  *	(i.e. data bit# 127).
1527  *
1528  * qwordmap table below is used to map mc_get_mem_unum "synd_code" value into
1529  * logical bit position assigned above by the memory-layout property.
1530  */
1531 
1532 #define	QWORD_SIZE	144
1533 static uint8_t qwordmap[QWORD_SIZE] =
1534 {
1535 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1536 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1537 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1538 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1539 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1540 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1541 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1542 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1543 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3
1544 };
1545 
1546 
1547 /* ARGSUSED */
1548 static int
1549 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1550 {
1551 	int i;
1552 	int pos_cacheline, position, index, idx4dimm;
1553 	int qwlayout = synd_code;
1554 	short offset, data;
1555 	char unum[UNUM_NAMLEN];
1556 	struct dimm_info *dimmp;
1557 	struct pin_info *pinp;
1558 	struct bank_info *bank;
1559 	struct mctrl_info *mctrl;
1560 
1561 	/*
1562 	 * Enforce old Openboot requirement for synd code, either a single-bit
1563 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1564 	 */
1565 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1566 		return (EINVAL);
1567 
1568 	unum[0] = '\0';
1569 
1570 	DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:qwlayout %d phyaddr 0x%lx\n",
1571 	    qwlayout, paddr));
1572 
1573 	/*
1574 	 * Scan all logical banks to get one responding to the physical
1575 	 * address. Then compute the index to look up dimm and pin tables
1576 	 * to generate the unmuber.
1577 	 */
1578 	mutex_enter(&mcdatamutex);
1579 	bank = (struct bank_info *)bank_head;
1580 	while (bank != NULL) {
1581 		int mcid, mcdgrpid, dimmoffset;
1582 
1583 		/*
1584 		 * Physical Address is in a bank if (Addr & Mask) == Match
1585 		 */
1586 		if ((paddr & bank->mask) != bank->match) {
1587 			bank = (struct bank_info *)bank->bank_node.next;
1588 			continue;
1589 		}
1590 
1591 		mcid = bank->bank_node.id / NLOGBANKS_PER_MC;
1592 		mctrl = mc_node_get(mcid, mctrl_head);
1593 		ASSERT(mctrl != NULL);
1594 
1595 		DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:mc %d bank %d "
1596 		    "dgrp %d\n", mcid, bank->bank_node.id, bank->devgrp_id));
1597 
1598 		mcdgrpid = bank->devgrp_id % NDGRPS_PER_MC;
1599 		dimmoffset = mcdgrpid * NDIMMS_PER_DGRP;
1600 
1601 		dimmp = (struct dimm_info *)mctrl->dimminfop;
1602 		if (dimmp == NULL) {
1603 			mutex_exit(&mcdatamutex);
1604 			return (ENXIO);
1605 		}
1606 
1607 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1608 			/*
1609 			 * single-bit error handling, we can identify specific
1610 			 * DIMM.
1611 			 */
1612 
1613 			pinp = (struct pin_info *)&dimmp->data[0];
1614 
1615 			pos_cacheline = qwordmap[qwlayout];
1616 			position = 143 - pos_cacheline;
1617 			index = position / 8;
1618 			offset = 7 - (position % 8);
1619 
1620 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:position "
1621 			    "%d\n", position));
1622 			/*
1623 			 * Trade-off: We cound't add pin number to
1624 			 * unumber string because statistic number
1625 			 * pumps up at the corresponding dimm not pin.
1626 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1627 			 * pinp->pintable[pos_cacheline]);
1628 			 */
1629 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:pin number "
1630 			    "%1u\n", (uint_t)pinp->pintable[pos_cacheline]));
1631 			data = pinp->dimmtable[index];
1632 			idx4dimm = (data >> offset) & 1;
1633 
1634 			(void) strncpy(unum,
1635 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1636 			    UNUM_NAMLEN);
1637 
1638 			DPRINTF(MC_GUNUM_DEBUG,
1639 			    ("mc_get_mem_unum:unum %s\n", unum));
1640 
1641 			/*
1642 			 * platform hook for adding label information to unum.
1643 			 */
1644 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, idx4dimm);
1645 		} else {
1646 			char *p = unum;
1647 			size_t res = UNUM_NAMLEN;
1648 
1649 			/*
1650 			 * multi-bit error handling, we can only identify
1651 			 * bank of DIMMs.
1652 			 */
1653 
1654 			for (i = 0; (i < NDIMMS_PER_DGRP) && (res > 0); i++) {
1655 				(void) snprintf(p, res, "%s%s",
1656 				    i == 0 ? "" : " ",
1657 				    (char *)dimmp->label[dimmoffset + i]);
1658 				res -= strlen(p);
1659 				p += strlen(p);
1660 			}
1661 
1662 			/*
1663 			 * platform hook for adding label information
1664 			 * to unum.
1665 			 */
1666 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, -1);
1667 		}
1668 		mutex_exit(&mcdatamutex);
1669 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1670 		    (strlen(unum) >= buflen)) {
1671 			return (ENAMETOOLONG);
1672 		} else {
1673 			(void) strncpy(buf, unum, UNUM_NAMLEN);
1674 			*lenp = strlen(buf);
1675 			return (0);
1676 		}
1677 	}	/* end of while loop for logic bank list */
1678 
1679 	mutex_exit(&mcdatamutex);
1680 	return (ENXIO);
1681 }
1682 
1683 static int
1684 mc_get_mem_info(int synd_code, uint64_t paddr,
1685     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1686     int *segsp, int *banksp, int *mcidp)
1687 {
1688 	struct bank_info *bankp;
1689 
1690 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1691 		return (EINVAL);
1692 
1693 	/*
1694 	 * Scan all logical banks to get one responding to the physical
1695 	 * address. Then compute the index to look up dimm and pin tables
1696 	 * to generate the unmuber.
1697 	 */
1698 	mutex_enter(&mcdatamutex);
1699 	bankp = (struct bank_info *)bank_head;
1700 	while (bankp != NULL) {
1701 		struct seg_info *segp;
1702 		int mcid;
1703 
1704 		/*
1705 		 * Physical Address is in a bank if (Addr & Mask) == Match
1706 		 */
1707 		if ((paddr & bankp->mask) != bankp->match) {
1708 			bankp = (struct bank_info *)bankp->bank_node.next;
1709 			continue;
1710 		}
1711 
1712 		mcid = bankp->bank_node.id / NLOGBANKS_PER_MC;
1713 
1714 		/*
1715 		 * Get the corresponding segment.
1716 		 */
1717 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1718 		    seg_head)) == NULL) {
1719 			mutex_exit(&mcdatamutex);
1720 			return (EFAULT);
1721 		}
1722 
1723 		*mem_sizep = memsize;
1724 		*seg_sizep = segp->size;
1725 		*bank_sizep = bankp->size;
1726 		*segsp = nsegments;
1727 		*banksp = segp->nbanks;
1728 		*mcidp = mcid;
1729 
1730 		mutex_exit(&mcdatamutex);
1731 		return (0);
1732 
1733 	}	/* end of while loop for logic bank list */
1734 
1735 	mutex_exit(&mcdatamutex);
1736 	return (ENXIO);
1737 }
1738 /*
1739  * mc-us3i driver allows a platform to add extra label
1740  * information to the unum string. If a platform implements a
1741  * kernel function called plat_add_mem_unum_label() it will be
1742  * executed. This would typically be implemented in the platmod.
1743  */
1744 static void
1745 mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
1746 {
1747 	if (&plat_add_mem_unum_label)
1748 		plat_add_mem_unum_label(unum, mcid, bank, dimm);
1749 }
1750