xref: /titanic_44/usr/src/uts/sun4u/io/mc-us3i.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <sys/ddi_impldefs.h>
35 #include <sys/obpdefs.h>
36 #include <sys/cmn_err.h>
37 #include <sys/errno.h>
38 #include <sys/kmem.h>
39 #include <sys/open.h>
40 #include <sys/thread.h>
41 #include <sys/cpuvar.h>
42 #include <sys/x_call.h>
43 #include <sys/debug.h>
44 #include <sys/sysmacros.h>
45 #include <sys/ivintr.h>
46 #include <sys/intr.h>
47 #include <sys/intreg.h>
48 #include <sys/autoconf.h>
49 #include <sys/modctl.h>
50 #include <sys/spl.h>
51 #include <sys/async.h>
52 #include <sys/mc.h>
53 #include <sys/mc-us3i.h>
54 #include <sys/note.h>
55 #include <sys/cpu_module.h>
56 
57 /*
58  * pm-hardware-state value
59  */
60 #define	NO_SUSPEND_RESUME	"no-suspend-resume"
61 
62 /*
63  * Function prototypes
64  */
65 
66 static int mc_open(dev_t *, int, int, cred_t *);
67 static int mc_close(dev_t, int, int, cred_t *);
68 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
69 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
70 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
71 
72 /*
73  * Configuration data structures
74  */
75 static struct cb_ops mc_cb_ops = {
76 	mc_open,			/* open */
77 	mc_close,			/* close */
78 	nulldev,			/* strategy */
79 	nulldev,			/* print */
80 	nodev,				/* dump */
81 	nulldev,			/* read */
82 	nulldev,			/* write */
83 	mc_ioctl,			/* ioctl */
84 	nodev,				/* devmap */
85 	nodev,				/* mmap */
86 	nodev,				/* segmap */
87 	nochpoll,			/* poll */
88 	ddi_prop_op,			/* cb_prop_op */
89 	0,				/* streamtab */
90 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
91 	CB_REV,				/* rev */
92 	nodev,				/* cb_aread */
93 	nodev				/* cb_awrite */
94 };
95 
96 static struct dev_ops mc_ops = {
97 	DEVO_REV,			/* rev */
98 	0,				/* refcnt  */
99 	ddi_no_info,			/* getinfo */
100 	nulldev,			/* identify */
101 	nulldev,			/* probe */
102 	mc_attach,			/* attach */
103 	mc_detach,			/* detach */
104 	nulldev,			/* reset */
105 	&mc_cb_ops,			/* cb_ops */
106 	(struct bus_ops *)0,		/* bus_ops */
107 	nulldev				/* power */
108 };
109 
110 /*
111  * Driver globals
112  */
113 static void *mcp;
114 static int nmcs = 0;
115 static int seg_id;
116 static int nsegments;
117 static uint64_t	memsize;
118 
119 static uint_t	mc_debug = 0;
120 
121 static int getreg;
122 static int nregs;
123 struct memory_reg_info *reg_info;
124 
125 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
126 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
127 static mc_dlist_t *device_head, *device_tail;
128 
129 static kmutex_t	mcmutex;
130 static kmutex_t	mcdatamutex;
131 static int mc_is_open = 0;
132 
133 extern struct mod_ops mod_driverops;
134 
135 static struct modldrv modldrv = {
136 	&mod_driverops,			/* module type, this one is a driver */
137 	"Memory-controller: %I%",	/* module name */
138 	&mc_ops,			/* driver ops */
139 };
140 
141 static struct modlinkage modlinkage = {
142 	MODREV_1,		/* rev */
143 	(void *)&modldrv,
144 	NULL
145 };
146 
147 static int mc_get_memory_reg_info(struct mc_soft_state *softsp);
148 static void mc_construct(struct mc_soft_state *softsp);
149 static void mc_delete(int mc_id);
150 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
151 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
152 static void *mc_node_get(int id, mc_dlist_t *head);
153 static void mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm);
154 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
155     int buflen, int *lenp);
156 static int mc_get_mem_info(int synd_code, uint64_t paddr,
157     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
158     int *segsp, int *banksp, int *mcidp);
159 
160 #pragma weak p2get_mem_unum
161 #pragma weak p2get_mem_info
162 #pragma weak plat_add_mem_unum_label
163 
164 /* For testing only */
165 struct test_unum {
166 	int		synd_code;
167 	uint64_t	paddr;
168 	char 		unum[UNUM_NAMLEN];
169 	int		len;
170 };
171 
172 /*
173  * These are the module initialization routines.
174  */
175 
176 int
177 _init(void)
178 {
179 	int error;
180 
181 	if ((error = ddi_soft_state_init(&mcp,
182 	    sizeof (struct mc_soft_state), 1)) != 0)
183 		return (error);
184 
185 	error =  mod_install(&modlinkage);
186 	if (error == 0) {
187 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
188 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
189 	}
190 
191 	return (error);
192 }
193 
194 int
195 _fini(void)
196 {
197 	int error;
198 
199 	if ((error = mod_remove(&modlinkage)) != 0)
200 		return (error);
201 
202 	ddi_soft_state_fini(&mcp);
203 	mutex_destroy(&mcmutex);
204 	mutex_destroy(&mcdatamutex);
205 	return (0);
206 }
207 
208 int
209 _info(struct modinfo *modinfop)
210 {
211 	return (mod_info(&modlinkage, modinfop));
212 }
213 
214 static int
215 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
216 {
217 	struct mc_soft_state *softsp;
218 	struct dimm_info *dimminfop;
219 	int instance, len, err;
220 	int mcreg1_len;
221 
222 	switch (cmd) {
223 	case DDI_ATTACH:
224 		break;
225 
226 	case DDI_RESUME:
227 		return (DDI_SUCCESS);
228 
229 	default:
230 		return (DDI_FAILURE);
231 	}
232 
233 	instance = ddi_get_instance(devi);
234 
235 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
236 		return (DDI_FAILURE);
237 
238 	softsp = ddi_get_soft_state(mcp, instance);
239 
240 	/* Set the dip in the soft state */
241 	softsp->dip = devi;
242 
243 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
244 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
245 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property\n",
246 		    instance, "portid"));
247 		goto bad;
248 	}
249 
250 	DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: mc %d portid %d, cpuid %d\n",
251 	    instance, softsp->portid, CPU->cpu_id));
252 
253 	/* Get the content of Memory Control Register I from obp */
254 	mcreg1_len = sizeof (uint64_t);
255 	if ((ddi_getlongprop_buf(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
256 	    "memory-control-register-1", (caddr_t)&(softsp->mcreg1),
257 	    &mcreg1_len) == DDI_PROP_SUCCESS) &&
258 	    (mcreg1_len == sizeof (uint64_t))) {
259 		softsp->mcr_read_ok = 1;
260 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d from obp: Reg1: 0x%lx\n",
261 		instance, softsp->mcreg1));
262 	}
263 
264 	/* attach fails if mcreg1 cannot be accessed */
265 	if (!softsp->mcr_read_ok) {
266 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get mcreg1\n",
267 		    instance));
268 		goto bad;
269 	}
270 
271 	/* nothing to suspend/resume here */
272 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
273 	    "pm-hardware-state", NO_SUSPEND_RESUME,
274 	    sizeof (NO_SUSPEND_RESUME));
275 
276 	/*
277 	 * Get the label of dimms and pin routing information from the
278 	 * memory-layout property of the memory controller.
279 	 */
280 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
281 	    "memory-layout", (caddr_t)&dimminfop, &len);
282 	if (err == DDI_PROP_SUCCESS && dimminfop->table_width == 1) {
283 		/* Set the pointer and size of property in the soft state */
284 		softsp->memlayoutp = dimminfop;
285 		softsp->memlayoutlen = len;
286 	} else {
287 		/*
288 		 * memory-layout property was not found or some other
289 		 * error occured, plat_get_mem_unum() will not work
290 		 * for this mc.
291 		 */
292 		softsp->memlayoutp = NULL;
293 		softsp->memlayoutlen = 0;
294 		DPRINTF(MC_ATTACH_DEBUG,
295 		    ("mc %d: missing or unsupported memory-layout property\n",
296 		    instance));
297 	}
298 
299 	mutex_enter(&mcmutex);
300 
301 	/* Get the physical segments from memory/reg, just once for all MC */
302 	if (!getreg) {
303 		if (mc_get_memory_reg_info(softsp) != 0) {
304 			goto bad1;
305 		}
306 		getreg = 1;
307 	}
308 
309 	/* Construct the physical and logical layout of the MC */
310 	mc_construct(softsp);
311 
312 	if (nmcs == 1) {
313 		if (&p2get_mem_unum)
314 			p2get_mem_unum = mc_get_mem_unum;
315 		if (&p2get_mem_info)
316 			p2get_mem_info = mc_get_mem_info;
317 	}
318 
319 	if (ddi_create_minor_node(devi, "mc-us3i", S_IFCHR, instance,
320 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
321 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
322 		    " failed \n"));
323 		goto bad1;
324 	}
325 	mutex_exit(&mcmutex);
326 
327 	ddi_report_dev(devi);
328 	return (DDI_SUCCESS);
329 
330 bad1:
331 	/* release all allocated data struture for this MC */
332 	mc_delete(softsp->portid);
333 	mutex_exit(&mcmutex);
334 	if (softsp->memlayoutp != NULL)
335 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
336 
337 bad:
338 	cmn_err(CE_WARN, "mc-us3i: attach failed for instance %d\n", instance);
339 	ddi_soft_state_free(mcp, instance);
340 	return (DDI_FAILURE);
341 }
342 
343 /* ARGSUSED */
344 static int
345 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
346 {
347 	int instance;
348 	struct mc_soft_state *softsp;
349 
350 	/* get the instance of this devi */
351 	instance = ddi_get_instance(devi);
352 
353 	/* get the soft state pointer for this device node */
354 	softsp = ddi_get_soft_state(mcp, instance);
355 
356 	switch (cmd) {
357 	case DDI_SUSPEND:
358 		return (DDI_SUCCESS);
359 
360 	case DDI_DETACH:
361 		break;
362 
363 	default:
364 		return (DDI_FAILURE);
365 	}
366 
367 	DPRINTF(MC_DETACH_DEBUG, ("mc %d DETACH: portid %d\n", instance,
368 	    softsp->portid));
369 
370 	mutex_enter(&mcmutex);
371 
372 	/* release all allocated data struture for this MC */
373 	mc_delete(softsp->portid);
374 
375 	if (softsp->memlayoutp != NULL)
376 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
377 
378 	if (nmcs == 0) {
379 		if (&p2get_mem_unum)
380 			p2get_mem_unum = NULL;
381 		if (&p2get_mem_info)
382 			p2get_mem_info = NULL;
383 	}
384 
385 	mutex_exit(&mcmutex);
386 
387 	ddi_remove_minor_node(devi, NULL);
388 	/* free up the soft state */
389 	ddi_soft_state_free(mcp, instance);
390 
391 	return (DDI_SUCCESS);
392 }
393 
394 /* ARGSUSED */
395 static int
396 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
397 {
398 	int status = 0;
399 
400 	/* verify that otyp is appropriate */
401 	if (otyp != OTYP_CHR) {
402 		return (EINVAL);
403 	}
404 
405 	mutex_enter(&mcmutex);
406 	/* At least one attached? */
407 	if (nmcs == 0) {
408 		status = ENXIO;
409 		goto bad;
410 	}
411 
412 	if (mc_is_open) {
413 		status = EBUSY;
414 		goto bad;
415 	}
416 	mc_is_open = 1;
417 bad:
418 
419 	mutex_exit(&mcmutex);
420 	return (status);
421 }
422 
423 /* ARGSUSED */
424 static int
425 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
426 {
427 	mutex_enter(&mcmutex);
428 	mc_is_open = 0;
429 	mutex_exit(&mcmutex);
430 
431 	return (0);
432 }
433 
434 /*
435  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
436  * MCIOC_CTRLCONF, MCIOC_CONTROL.
437  *
438  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
439  * associated with various length struct. If given number is less than the
440  * number in kernel, update the number and return EINVAL so that user could
441  * allocate enough space for it.
442  *
443  */
444 
445 /* ARGSUSED */
446 static int
447 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
448 	int *rval_p)
449 {
450 	size_t	size;
451 	struct mc_memconf mcmconf;
452 	struct mc_memory *mcmem, mcmem_in;
453 	struct mc_segment *mcseg, mcseg_in;
454 	struct mc_bank mcbank;
455 	struct mc_devgrp mcdevgrp;
456 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
457 	struct mc_control *mccontrol, mccontrol_in;
458 	struct seg_info *seg = NULL;
459 	struct bank_info *bank = NULL;
460 	struct dgrp_info *dgrp = NULL;
461 	struct mctrl_info *mcport;
462 	mc_dlist_t *mctrl;
463 	int i, status = 0;
464 	cpu_t *cpu;
465 
466 	switch (cmd) {
467 	case MCIOC_MEMCONF:
468 		mutex_enter(&mcdatamutex);
469 
470 		mcmconf.nmcs = nmcs;
471 		mcmconf.nsegments = nsegments;
472 		mcmconf.nbanks = NLOGBANKS_PER_SEG;
473 		mcmconf.ndevgrps = NDGRPS_PER_MC;
474 		mcmconf.ndevs = NDIMMS_PER_DGRP;
475 		mcmconf.len_dev = MAX_DEVLEN;
476 		mcmconf.xfer_size = TRANSFER_SIZE;
477 
478 		mutex_exit(&mcdatamutex);
479 
480 		if (copyout(&mcmconf, (void *)arg, sizeof (mcmconf)))
481 			return (EFAULT);
482 		return (0);
483 
484 	/*
485 	 * input: nsegments and allocate space for various length of segmentids
486 	 *
487 	 * return    0: size, number of segments, and all segment ids,
488 	 *		where glocal and local ids are identical.
489 	 *	EINVAL: if the given nsegments is less than that in kernel and
490 	 *		nsegments of struct will be updated.
491 	 *	EFAULT: if other errors in kernel.
492 	 */
493 	case MCIOC_MEM:
494 		if (copyin((void *)arg, &mcmem_in, sizeof (mcmem_in)) != 0)
495 			return (EFAULT);
496 
497 		mutex_enter(&mcdatamutex);
498 		if (mcmem_in.nsegments < nsegments) {
499 			mcmem_in.nsegments = nsegments;
500 			mutex_exit(&mcdatamutex);
501 			if (copyout(&mcmem_in, (void *)arg, sizeof (mcmem_in)))
502 				status = EFAULT;
503 			else
504 				status = EINVAL;
505 
506 			return (status);
507 		}
508 
509 		size = sizeof (*mcmem) + (nsegments - 1) *
510 		    sizeof (mcmem->segmentids[0]);
511 		mcmem = kmem_zalloc(size, KM_SLEEP);
512 
513 		mcmem->size = memsize;
514 		mcmem->nsegments = nsegments;
515 		seg = (struct seg_info *)seg_head;
516 		for (i = 0; i < nsegments; i++) {
517 			ASSERT(seg != NULL);
518 			mcmem->segmentids[i].globalid = seg->seg_node.id;
519 			mcmem->segmentids[i].localid = seg->seg_node.id;
520 			seg = (struct seg_info *)seg->seg_node.next;
521 		}
522 		mutex_exit(&mcdatamutex);
523 
524 		if (copyout(mcmem, (void *)arg, size))
525 			status = EFAULT;
526 
527 		kmem_free(mcmem, size);
528 		return (status);
529 
530 	/*
531 	 * input: id, nbanks and allocate space for various length of bankids
532 	 *
533 	 * return    0: base, size, number of banks, and all bank ids,
534 	 *		where global id is unique of all banks and local id
535 	 *		is only unique for mc.
536 	 *	EINVAL: either id isn't found or if given nbanks is less than
537 	 *		that in kernel and nbanks of struct will be updated.
538 	 *	EFAULT: if other errors in kernel.
539 	 */
540 	case MCIOC_SEG:
541 
542 		if (copyin((void *)arg, &mcseg_in, sizeof (mcseg_in)) != 0)
543 			return (EFAULT);
544 
545 		mutex_enter(&mcdatamutex);
546 		if ((seg = mc_node_get(mcseg_in.id, seg_head)) == NULL) {
547 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
548 			    "id %d\n", mcseg_in.id));
549 			mutex_exit(&mcdatamutex);
550 			return (EFAULT);
551 		}
552 
553 		if (mcseg_in.nbanks < seg->nbanks) {
554 			mcseg_in.nbanks = seg->nbanks;
555 			mutex_exit(&mcdatamutex);
556 			if (copyout(&mcseg_in, (void *)arg, sizeof (mcseg_in)))
557 				status = EFAULT;
558 			else
559 				status = EINVAL;
560 
561 			return (status);
562 		}
563 
564 		size = sizeof (*mcseg) + (seg->nbanks - 1) *
565 		    sizeof (mcseg->bankids[0]);
566 		mcseg = kmem_zalloc(size, KM_SLEEP);
567 
568 		mcseg->id = seg->seg_node.id;
569 		mcseg->ifactor = seg->ifactor;
570 		mcseg->base = seg->base;
571 		mcseg->size = seg->size;
572 		mcseg->nbanks = seg->nbanks;
573 
574 		bank = seg->head;
575 
576 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg %p bank %p\n",
577 		    seg->nbanks, (void *) seg, (void *) bank));
578 
579 		i = 0;
580 		while (bank != NULL) {
581 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
582 			    i, bank->bank_node.id));
583 			mcseg->bankids[i].globalid = bank->bank_node.id;
584 			mcseg->bankids[i++].localid = bank->local_id;
585 			bank = bank->next;
586 		}
587 		ASSERT(i == seg->nbanks);
588 		mutex_exit(&mcdatamutex);
589 
590 		if (copyout(mcseg, (void *)arg, size))
591 			status = EFAULT;
592 
593 		kmem_free(mcseg, size);
594 		return (status);
595 
596 	/*
597 	 * input: id
598 	 *
599 	 * return    0: mask, match, size, and devgrpid,
600 	 *		where global id is unique of all devgrps and local id
601 	 *		is only unique for mc.
602 	 *	EINVAL: if id isn't found
603 	 *	EFAULT: if other errors in kernel.
604 	 */
605 	case MCIOC_BANK:
606 		if (copyin((void *)arg, &mcbank, sizeof (mcbank)) != 0)
607 			return (EFAULT);
608 
609 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
610 
611 		mutex_enter(&mcdatamutex);
612 
613 		if ((bank = mc_node_get(mcbank.id, bank_head)) == NULL) {
614 			mutex_exit(&mcdatamutex);
615 			return (EINVAL);
616 		}
617 
618 		mcbank.mask = bank->mask;
619 		mcbank.match = bank->match;
620 		mcbank.size = bank->size;
621 		mcbank.devgrpid.globalid = bank->devgrp_id;
622 		mcbank.devgrpid.localid =
623 		    bank->bank_node.id % NLOGBANKS_PER_SEG;
624 
625 		mutex_exit(&mcdatamutex);
626 
627 		if (copyout(&mcbank, (void *)arg, sizeof (mcbank)))
628 			return (EFAULT);
629 		return (0);
630 
631 	/*
632 	 * input:id and allocate space for various length of deviceids
633 	 *
634 	 * return    0: size and number of devices.
635 	 *	EINVAL: id isn't found
636 	 *	EFAULT: if other errors in kernel.
637 	 */
638 	case MCIOC_DEVGRP:
639 
640 		if (copyin((void *)arg, &mcdevgrp, sizeof (mcdevgrp)) != 0)
641 			return (EFAULT);
642 
643 		mutex_enter(&mcdatamutex);
644 		if ((dgrp = mc_node_get(mcdevgrp.id, dgrp_head)) == NULL) {
645 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
646 			    "%d\n", mcdevgrp.id));
647 			mutex_exit(&mcdatamutex);
648 			return (EINVAL);
649 		}
650 
651 		mcdevgrp.ndevices = dgrp->ndevices;
652 		mcdevgrp.size = dgrp->size;
653 
654 		mutex_exit(&mcdatamutex);
655 
656 		if (copyout(&mcdevgrp, (void *)arg, sizeof (mcdevgrp)))
657 			status = EFAULT;
658 
659 		return (status);
660 
661 	/*
662 	 * input: nmcs and allocate space for various length of mcids
663 	 *
664 	 * return    0: number of mc, and all mcids,
665 	 *		where glocal and local ids are identical.
666 	 *	EINVAL: if the given nmcs is less than that in kernel and
667 	 *		nmcs of struct will be updated.
668 	 *	EFAULT: if other errors in kernel.
669 	 */
670 	case MCIOC_CTRLCONF:
671 		if (copyin((void *)arg, &mcctrlconf_in,
672 		    sizeof (mcctrlconf_in)) != 0)
673 			return (EFAULT);
674 
675 		mutex_enter(&mcdatamutex);
676 		if (mcctrlconf_in.nmcs < nmcs) {
677 			mcctrlconf_in.nmcs = nmcs;
678 			mutex_exit(&mcdatamutex);
679 			if (copyout(&mcctrlconf_in, (void *)arg,
680 			    sizeof (mcctrlconf_in)))
681 				status = EFAULT;
682 			else
683 				status = EINVAL;
684 
685 			return (status);
686 		}
687 
688 		/*
689 		 * Cannot just use the size of the struct because of the various
690 		 * length struct
691 		 */
692 		size = sizeof (*mcctrlconf) + ((nmcs - 1) *
693 		    sizeof (mcctrlconf->mcids[0]));
694 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
695 
696 		mcctrlconf->nmcs = nmcs;
697 
698 		/* Get all MC ids and add to mcctrlconf */
699 		mctrl = mctrl_head;
700 		i = 0;
701 		while (mctrl != NULL) {
702 			mcctrlconf->mcids[i].globalid = mctrl->id;
703 			mcctrlconf->mcids[i].localid = mctrl->id;
704 			i++;
705 			mctrl = mctrl->next;
706 		}
707 		ASSERT(i == nmcs);
708 
709 		mutex_exit(&mcdatamutex);
710 
711 		if (copyout(mcctrlconf, (void *)arg, size))
712 			status = EFAULT;
713 
714 		kmem_free(mcctrlconf, size);
715 		return (status);
716 
717 	/*
718 	 * input:id, ndevgrps and allocate space for various length of devgrpids
719 	 *
720 	 * return    0: number of devgrp, and all devgrpids,
721 	 *		is unique of all devgrps and local id is only unique
722 	 *		for mc.
723 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
724 	 *		less than that in kernel and ndevgrps of struct will
725 	 *		be updated.
726 	 *	EFAULT: if other errors in kernel.
727 	 */
728 	case MCIOC_CONTROL:
729 		if (copyin((void *)arg, &mccontrol_in,
730 		    sizeof (mccontrol_in)) != 0)
731 			return (EFAULT);
732 
733 		mutex_enter(&mcdatamutex);
734 		if ((mcport = mc_node_get(mccontrol_in.id,
735 		    mctrl_head)) == NULL) {
736 			mutex_exit(&mcdatamutex);
737 			return (EINVAL);
738 		}
739 
740 		/*
741 		 * mcport->ndevgrps zero means Memory Controller is disable.
742 		 */
743 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
744 		    (mcport->ndevgrps == 0)) {
745 			mccontrol_in.ndevgrps = mcport->ndevgrps;
746 			mutex_exit(&mcdatamutex);
747 			if (copyout(&mccontrol_in, (void *)arg,
748 			    sizeof (mccontrol_in)))
749 				status = EFAULT;
750 			else if (mcport->ndevgrps != 0)
751 				status = EINVAL;
752 
753 			return (status);
754 		}
755 
756 		size = sizeof (*mccontrol) + (mcport->ndevgrps - 1) *
757 		    sizeof (mccontrol->devgrpids[0]);
758 		mccontrol = kmem_zalloc(size, KM_SLEEP);
759 
760 		mccontrol->id = mcport->mctrl_node.id;
761 		mccontrol->ndevgrps = mcport->ndevgrps;
762 		for (i = 0; i < mcport->ndevgrps; i++) {
763 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
764 			mccontrol->devgrpids[i].localid =
765 			    mcport->devgrpids[i] % NDGRPS_PER_MC;
766 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %d\n",
767 			    i));
768 		}
769 		mutex_exit(&mcdatamutex);
770 
771 		if (copyout(mccontrol, (void *)arg, size))
772 			status = EFAULT;
773 
774 		kmem_free(mccontrol, size);
775 		return (status);
776 
777 	/*
778 	 * input:id
779 	 *
780 	 * return    0: CPU flushed successfully.
781 	 *	EINVAL: the id wasn't found
782 	 */
783 	case MCIOC_ECFLUSH:
784 		mutex_enter(&cpu_lock);
785 		cpu = cpu_get((processorid_t)arg);
786 		mutex_exit(&cpu_lock);
787 		if (cpu == NULL)
788 			return (EINVAL);
789 
790 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
791 
792 		return (0);
793 
794 	default:
795 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
796 		return (EFAULT);
797 	}
798 }
799 
800 /*
801  * Gets the reg property from the memory node. This provides the various
802  * memory segments, at bank-boundries, dimm-pair boundries, in the form
803  * of [base, size] pairs. Continuous segments, spanning boundries are
804  * merged into one.
805  * Returns 0 for success and -1 for failure.
806  */
807 static int
808 mc_get_memory_reg_info(struct mc_soft_state *softsp)
809 {
810 	dev_info_t *devi;
811 	int len;
812 	int i;
813 	struct memory_reg_info *mregi;
814 
815 	_NOTE(ARGUNUSED(softsp))
816 
817 	if ((devi = ddi_find_devinfo("memory", -1, 0)) == NULL) {
818 		DPRINTF(MC_REG_DEBUG,
819 		    ("mc-us3i: cannot find memory node under root\n"));
820 		return (-1);
821 	}
822 
823 	if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
824 	    "reg", (caddr_t)&reg_info, &len) != DDI_PROP_SUCCESS) {
825 		DPRINTF(MC_REG_DEBUG,
826 		    ("mc-us3i: reg undefined under memory\n"));
827 		return (-1);
828 	}
829 
830 	nregs = len/sizeof (*mregi);
831 
832 	DPRINTF(MC_REG_DEBUG, ("mc_get_memory_reg_info: nregs %d"
833 	    "reg_info %p\n", nregs, (void *) reg_info));
834 
835 	mregi = reg_info;
836 
837 	/* debug printfs  */
838 	for (i = 0; i < nregs; i++) {
839 		DPRINTF(MC_REG_DEBUG, (" [0x%lx, 0x%lx] ",
840 		    mregi->base, mregi->size));
841 		mregi++;
842 	}
843 
844 	return (0);
845 }
846 
847 /*
848  * Initialize a logical bank
849  */
850 static struct bank_info *
851 mc_add_bank(int bankid, uint64_t mask, uint64_t match, uint64_t size,
852     int dgrpid)
853 {
854 	struct bank_info *banki;
855 
856 	if ((banki = mc_node_get(bankid, bank_head)) != NULL) {
857 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: bank %d exists\n",
858 		    bankid));
859 		return (banki);
860 	}
861 
862 	banki = kmem_zalloc(sizeof (*banki), KM_SLEEP);
863 
864 	banki->bank_node.id = bankid;
865 	banki->devgrp_id = dgrpid;
866 	banki->mask = mask;
867 	banki->match = match;
868 	banki->base = match;
869 	banki->size = size;
870 
871 	mc_node_add((mc_dlist_t *)banki, &bank_head, &bank_tail);
872 
873 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: id %d mask 0x%lx match 0x%lx"
874 	    " base 0x%lx size 0x%lx\n", bankid, mask, match,
875 	    banki->base, banki->size));
876 
877 	return (banki);
878 }
879 
880 /*
881  * Use the bank's base address to find out whether to initialize a new segment,
882  * or weave the bank into an existing segment. If the tail bank of a previous
883  * segment is not continuous with the new bank, the new bank goes into a new
884  * segment.
885  */
886 static void
887 mc_add_segment(struct bank_info *banki)
888 {
889 	struct seg_info *segi;
890 	struct bank_info *tb;
891 
892 	/* does this bank start a new segment? */
893 	if ((segi = mc_node_get(seg_id, seg_head)) == NULL) {
894 		/* this should happen for the first segment only */
895 		goto new_seg;
896 	}
897 
898 	tb = segi->tail;
899 	/* discontiguous banks go into a new segment, increment the seg_id */
900 	if (banki->base > (tb->base + tb->size)) {
901 		seg_id++;
902 		goto new_seg;
903 	}
904 
905 	/* weave the bank into the segment */
906 	segi->nbanks++;
907 	tb->next = banki;
908 
909 	banki->seg_id = segi->seg_node.id;
910 	banki->local_id = tb->local_id + 1;
911 
912 	/* contiguous or interleaved? */
913 	if (banki->base != (tb->base + tb->size))
914 		segi->ifactor++;
915 
916 	segi->size += banki->size;
917 	segi->tail = banki;
918 
919 	memsize += banki->size;
920 
921 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d add bank: id %d"
922 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
923 	    banki->size));
924 
925 	return;
926 
927 new_seg:
928 	segi = kmem_zalloc(sizeof (*segi), KM_SLEEP);
929 
930 	segi->seg_node.id = seg_id;
931 	segi->nbanks = 1;
932 	segi->ifactor = 1;
933 	segi->base = banki->base;
934 	segi->size = banki->size;
935 	segi->head = banki;
936 	segi->tail = banki;
937 
938 	banki->seg_id = segi->seg_node.id;
939 	banki->local_id = 0;
940 
941 	mc_node_add((mc_dlist_t *)segi, &seg_head, &seg_tail);
942 	nsegments++;
943 
944 	memsize += banki->size;
945 
946 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d new bank: id %d"
947 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
948 	    banki->size));
949 }
950 
951 /*
952  * Returns the address bit number (row index) that controls the logical/external
953  * bank assignment in interleave of kind internal-external same dimm-pair,
954  * internal-external both dimm-pair. This is done by using the dimm-densities
955  * and part-type.
956  */
957 static int
958 get_row_shift(int row_index, struct dgrp_info *dgrp)
959 {
960 	int shift;
961 
962 	switch (dgrp->base_device) {
963 	case BASE_DEVICE_128Mb:
964 	case BASE_DEVICE_256Mb:
965 		/* 128Mb and 256Mb devices have same bank select mask */
966 		shift = ADDR_GEN_128Mb_X8_ROW_0;
967 		break;
968 	case BASE_DEVICE_512Mb:
969 	case BASE_DEVICE_1Gb:
970 		/* 512 and 1Gb devices have same bank select mask */
971 		shift = ADDR_GEN_512Mb_X8_ROW_0;
972 		break;
973 	}
974 
975 	if (dgrp->part_type == PART_TYPE_X4)
976 		shift += 1;
977 
978 	shift += row_index;
979 
980 	return (shift);
981 }
982 
983 
984 static void
985 get_device_select(int interleave, struct dgrp_info *dgrp,
986     int *ds_shift, int *bs_shift)
987 {
988 
989 	switch (interleave) {
990 	case INTERLEAVE_DISABLE:
991 	/* Fall Through */
992 	case INTERLEAVE_INTERNAL:
993 		/* Bit 33 selects the dimm group/pair */
994 		*ds_shift = DIMM_PAIR_SELECT_SHIFT;
995 		if (dgrp->nlogbanks == 2) {
996 			/* Bit 32 selects the logical bank */
997 			*bs_shift = LOG_BANK_SELECT_SHIFT;
998 		}
999 		break;
1000 	case INTERLEAVE_INTEXT_SAME_DIMM_PAIR:
1001 		/* Bit 33 selects the dimm group/pair */
1002 		*ds_shift =  DIMM_PAIR_SELECT_SHIFT;
1003 		if (dgrp->nlogbanks == 2) {
1004 			/* Row[2] selects the logical bank */
1005 			*bs_shift = get_row_shift(2, dgrp);
1006 		}
1007 		break;
1008 	case INTERLEAVE_INTEXT_BOTH_DIMM_PAIR:
1009 		if (dgrp->nlogbanks == 2) {
1010 			/* Row[3] selects the dimm group/pair */
1011 			*ds_shift = get_row_shift(3, dgrp);
1012 
1013 			/* Row[2] selects the logical bank */
1014 			*bs_shift = get_row_shift(2, dgrp);
1015 		} else {
1016 			/* Row[2] selects the dimm group/pair */
1017 			*ds_shift = get_row_shift(2, dgrp);
1018 		}
1019 		break;
1020 	}
1021 }
1022 
1023 static void
1024 mc_add_xor_banks(struct mctrl_info *mctrl,
1025     uint64_t mask, uint64_t match, int interleave)
1026 {
1027 	int i, j, nbits, nbanks;
1028 	int bankid;
1029 	int dselect[4];
1030 	int ds_shift = -1, bs_shift = -1;
1031 	uint64_t id, size, xmatch;
1032 	struct bank_info *banki;
1033 	struct dgrp_info *dgrp;
1034 
1035 	/* xor mode - assume 2 identical dimm-pairs */
1036 	if ((dgrp = mc_node_get(mctrl->devgrpids[0], dgrp_head)) == NULL) {
1037 		return;
1038 	}
1039 
1040 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1041 
1042 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1043 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1044 
1045 	/* xor enable means, bit 21 is used for dimm-pair select */
1046 	mask |= XOR_DEVICE_SELECT_MASK;
1047 	if (dgrp->nlogbanks == NLOGBANKS_PER_DGRP) {
1048 		/* bit 20 is used for logbank select */
1049 		mask |= XOR_BANK_SELECT_MASK;
1050 	}
1051 
1052 	/* find out the bits set to 1 in mask, nbits can be 2 or 4 */
1053 	nbits = 0;
1054 	for (i = 0; i <= DIMM_PAIR_SELECT_SHIFT; i++) {
1055 		if ((((mask >> i) & 1) == 1) && (nbits < 4)) {
1056 			dselect[nbits] = i;
1057 			nbits++;
1058 		}
1059 	}
1060 
1061 	/* number or banks can be 4 or 16 */
1062 	nbanks = 1 << nbits;
1063 
1064 	size = (dgrp->size * 2)/nbanks;
1065 
1066 	bankid = mctrl->mctrl_node.id * NLOGBANKS_PER_MC;
1067 
1068 	/* each bit position of the mask decides the match & base for bank */
1069 	for (i = 0; i < nbanks; i++) {
1070 		xmatch = 0;
1071 		for (j = 0; j < nbits; j++) {
1072 			xmatch |= (i & (1ULL << j)) << (dselect[j] - j);
1073 		}
1074 		/* xor ds bits to get the dimm-pair */
1075 		id = ((xmatch & (1ULL << ds_shift)) >> ds_shift) ^
1076 			((xmatch & (1ULL << XOR_DEVICE_SELECT_SHIFT)) >>
1077 			XOR_DEVICE_SELECT_SHIFT);
1078 		banki = mc_add_bank(bankid, mask, match | xmatch, size,
1079 		    mctrl->devgrpids[id]);
1080 		mc_add_segment(banki);
1081 		bankid++;
1082 	}
1083 }
1084 
1085 /*
1086  * Based on interleave, dimm-densities, part-type determine the mask
1087  * and match per bank, construct the logical layout by adding segments
1088  * and banks
1089  */
1090 static int
1091 mc_add_dgrp_banks(uint64_t bankid, uint64_t dgrpid,
1092     uint64_t mask, uint64_t match, int interleave)
1093 {
1094 	int nbanks = 0;
1095 	struct bank_info *banki;
1096 	struct dgrp_info *dgrp;
1097 	int ds_shift = -1, bs_shift = -1;
1098 	uint64_t size;
1099 	uint64_t match_save;
1100 
1101 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) == NULL) {
1102 		return (0);
1103 	}
1104 
1105 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1106 
1107 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1108 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1109 	match |= (ds_shift == -1 ? 0 : ((dgrpid & 1) << ds_shift));
1110 	match_save = match;
1111 	size = dgrp->size/dgrp->nlogbanks;
1112 
1113 	/* for bankid 0, 2, 4 .. */
1114 	match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1115 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1116 	    " mask 0x%lx bs_shift %d match 0x%lx\n",
1117 	    interleave, mask, bs_shift, match));
1118 	banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1119 	nbanks++;
1120 	mc_add_segment(banki);
1121 
1122 	if (dgrp->nlogbanks == 2) {
1123 		/*
1124 		 * Set match value to original before adding second
1125 		 * logical bank interleaving information.
1126 		 */
1127 		match = match_save;
1128 		bankid++;
1129 		match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1130 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1131 		    " mask 0x%lx shift %d match 0x%lx\n",
1132 		    interleave, mask, bs_shift, match));
1133 		banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1134 		nbanks++;
1135 		mc_add_segment(banki);
1136 	}
1137 
1138 	return (nbanks);
1139 }
1140 
1141 /*
1142  * Construct the logical layout
1143  */
1144 static void
1145 mc_logical_layout(struct mctrl_info *mctrl, struct mc_soft_state *softsp)
1146 {
1147 	int i;
1148 	uint64_t mcid, bankid, interleave, mask, match;
1149 
1150 	if (mctrl->ndevgrps == 0)
1151 		return;
1152 
1153 	mcid = mctrl->mctrl_node.id;
1154 	mask = MC_SELECT_MASK;
1155 	match = mcid << MC_SELECT_SHIFT;
1156 
1157 	interleave = (softsp->mcreg1 & MCREG1_INTERLEAVE_MASK) >>
1158 	    MCREG1_INTERLEAVE_SHIFT;
1159 
1160 	/* Two dimm pairs and xor bit set */
1161 	if (mctrl->ndevgrps == NDGRPS_PER_MC &&
1162 	    (softsp->mcreg1 & MCREG1_XOR_ENABLE)) {
1163 		mc_add_xor_banks(mctrl, mask, match, interleave);
1164 		return;
1165 	}
1166 
1167 	/*
1168 	 * For xor bit unset or only one dimm pair.
1169 	 * In one dimm pair case, even if xor bit is set, xor
1170 	 * interleaving is only taking place in dimm's internal
1171 	 * banks. Dimm and external bank select bits are the
1172 	 * same as those without xor bit set.
1173 	 */
1174 	bankid = mcid * NLOGBANKS_PER_MC;
1175 	for (i = 0; i < mctrl->ndevgrps; i++) {
1176 		bankid += mc_add_dgrp_banks(bankid, mctrl->devgrpids[i],
1177 				mask, match, interleave);
1178 	}
1179 }
1180 
1181 /*
1182  * Get the dimm-pair's size from the reg_info
1183  */
1184 static uint64_t
1185 get_devgrp_size(uint64_t start)
1186 {
1187 	int i;
1188 	uint64_t size;
1189 	uint64_t end, reg_start, reg_end;
1190 	struct memory_reg_info *regi;
1191 
1192 	/* dgrp end address */
1193 	end = start + DGRP_SIZE_MAX - 1;
1194 
1195 	regi = reg_info;
1196 	size = 0;
1197 	for (i = 0; i < nregs; i++) {
1198 		reg_start = regi->base;
1199 		reg_end = regi->base + regi->size - 1;
1200 
1201 		/* completely outside */
1202 		if ((reg_end < start) || (reg_start > end)) {
1203 			regi++;
1204 			continue;
1205 		}
1206 
1207 		/* completely inside */
1208 		if ((reg_start <= start) && (reg_end >= end)) {
1209 			return (DGRP_SIZE_MAX);
1210 		}
1211 
1212 		/* start is inside, but not the end, get the remainder */
1213 		if (reg_start < start) {
1214 			size = regi->size - (start - reg_start);
1215 			regi++;
1216 			continue;
1217 		}
1218 
1219 		/* add up size for all within range */
1220 		size += regi->size;
1221 		regi++;
1222 	}
1223 
1224 	return (size);
1225 }
1226 
1227 /*
1228  * Each device group is a pair (dimm-pair) of identical single/dual dimms.
1229  * Determine the dimm-pair's dimm-densities and part-type using the MCR-I.
1230  */
1231 static void
1232 mc_add_devgrp(int dgrpid, struct mc_soft_state *softsp)
1233 {
1234 	int i, mcid, devid, dgrpoffset;
1235 	struct dgrp_info *dgrp;
1236 	struct device_info *dev;
1237 	struct dimm_info *dimmp = (struct dimm_info *)softsp->memlayoutp;
1238 
1239 	mcid = softsp->portid;
1240 
1241 	/* add the entry on dgrp_info list */
1242 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) != NULL) {
1243 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: devgrp %d exists\n",
1244 		    dgrpid));
1245 		return;
1246 	}
1247 
1248 	dgrp = kmem_zalloc(sizeof (*dgrp), KM_SLEEP);
1249 
1250 	dgrp->dgrp_node.id = dgrpid;
1251 
1252 	/* a devgrp has identical (type & size) pair */
1253 	if ((dgrpid & 1) == 0) {
1254 		/* dimm-pair 0, 2, 4, 6 */
1255 		if (softsp->mcreg1 & MCREG1_DIMM1_BANK1)
1256 			dgrp->nlogbanks = 2;
1257 		else
1258 			dgrp->nlogbanks = 1;
1259 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN1_MASK) >>
1260 		    MCREG1_ADDRGEN1_SHIFT;
1261 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM1_MASK) >>
1262 		    MCREG1_X4DIMM1_SHIFT;
1263 	} else {
1264 		/* dimm-pair 1, 3, 5, 7 */
1265 		if (softsp->mcreg1 & MCREG1_DIMM2_BANK3)
1266 			dgrp->nlogbanks = 2;
1267 		else
1268 			dgrp->nlogbanks = 1;
1269 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN2_MASK) >>
1270 		    MCREG1_ADDRGEN2_SHIFT;
1271 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM2_MASK) >>
1272 		    MCREG1_X4DIMM2_SHIFT;
1273 	}
1274 
1275 	dgrp->base = MC_BASE(mcid) + DGRP_BASE(dgrpid);
1276 	dgrp->size = get_devgrp_size(dgrp->base);
1277 
1278 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: id %d size %ld logbanks %d"
1279 	    " base_device %d part_type %d\n", dgrpid, dgrp->size,
1280 	    dgrp->nlogbanks, dgrp->base_device, dgrp->part_type));
1281 
1282 	dgrpoffset = dgrpid % NDGRPS_PER_MC;
1283 	dgrp->ndevices = NDIMMS_PER_DGRP;
1284 	/* add the entry for the (identical) pair of dimms/device */
1285 	for (i = 0; i < NDIMMS_PER_DGRP; i++) {
1286 		devid = dgrpid * NDIMMS_PER_DGRP + i;
1287 		dgrp->deviceids[i] = devid;
1288 
1289 		if ((dev = mc_node_get(devid, device_head)) != NULL) {
1290 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: device %d "
1291 			    "exists\n", devid));
1292 			continue;
1293 		}
1294 
1295 		dev = kmem_zalloc(sizeof (*dev), KM_SLEEP);
1296 
1297 		dev->dev_node.id = devid;
1298 
1299 		dev->size = dgrp->size/2;
1300 
1301 		if (dimmp) {
1302 			(void) strncpy(dev->label, (char *)dimmp->label[
1303 			    i + NDIMMS_PER_DGRP * dgrpoffset],
1304 			    MAX_DEVLEN);
1305 
1306 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: dimm %d %s\n",
1307 			    dev->dev_node.id, dev->label));
1308 		}
1309 
1310 		mc_node_add((mc_dlist_t *)dev, &device_head, &device_tail);
1311 	}
1312 
1313 	mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1314 }
1315 
1316 /*
1317  * Construct the physical and logical layout
1318  */
1319 static void
1320 mc_construct(struct mc_soft_state *softsp)
1321 {
1322 	int i, mcid, dgrpid;
1323 	struct mctrl_info *mctrl;
1324 
1325 	mcid = softsp->portid;
1326 
1327 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mcid %d, mcreg1 0x%lx\n",
1328 	    mcid, softsp->mcreg1));
1329 
1330 	/*
1331 	 * Construct the Physical & Logical Layout
1332 	 */
1333 	mutex_enter(&mcdatamutex);
1334 
1335 	/* allocate for mctrl_info */
1336 	if ((mctrl = mc_node_get(mcid, mctrl_head)) != NULL) {
1337 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mctrl %d exists\n",
1338 		    mcid));
1339 		mutex_exit(&mcdatamutex);
1340 		return;
1341 	}
1342 
1343 	mctrl = kmem_zalloc(sizeof (*mctrl), KM_SLEEP);
1344 
1345 	mctrl->mctrl_node.id = mcid;
1346 
1347 	i = 0;
1348 	dgrpid = mcid * NDGRPS_PER_MC;
1349 	if (softsp->mcreg1 & MCREG1_DIMM1_BANK0) {
1350 		mc_add_devgrp(dgrpid, softsp);
1351 		mctrl->devgrpids[i] = dgrpid;
1352 		mctrl->ndevgrps++;
1353 		i++;
1354 	}
1355 
1356 	if (softsp->mcreg1 & MCREG1_DIMM2_BANK2) {
1357 		dgrpid++;
1358 		mc_add_devgrp(dgrpid, softsp);
1359 		mctrl->devgrpids[i] = dgrpid;
1360 		mctrl->ndevgrps++;
1361 	}
1362 
1363 	mc_logical_layout(mctrl, softsp);
1364 
1365 	mctrl->dimminfop = (struct dimm_info *)softsp->memlayoutp;
1366 
1367 	nmcs++;
1368 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1369 
1370 	mutex_exit(&mcdatamutex);
1371 
1372 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: nmcs %d memsize %ld"
1373 	    "nsegments %d\n", nmcs, memsize, nsegments));
1374 }
1375 
1376 /*
1377  * Delete nodes related to the given MC on mc, device group, device,
1378  * and bank lists. Moreover, delete corresponding segment if its connected
1379  * banks are all removed.
1380  */
1381 static void
1382 mc_delete(int mc_id)
1383 {
1384 	int i, j, dgrpid, devid, bankid;
1385 	struct mctrl_info *mctrl;
1386 	struct dgrp_info *dgrp;
1387 	struct device_info *devp;
1388 	struct seg_info *segi;
1389 	struct bank_info *banki;
1390 
1391 	mutex_enter(&mcdatamutex);
1392 
1393 	/* delete mctrl_info */
1394 	if ((mctrl = mc_node_get(mc_id, mctrl_head)) != NULL) {
1395 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1396 		kmem_free(mctrl, sizeof (*mctrl));
1397 		nmcs--;
1398 	} else
1399 		DPRINTF(MC_DESTRC_DEBUG, ("mc_delete: mctrl is not found\n"));
1400 
1401 	/* delete device groups and devices of the detached MC */
1402 	for (i = 0; i < NDGRPS_PER_MC; i++) {
1403 		dgrpid = mc_id * NDGRPS_PER_MC + i;
1404 		if (!(dgrp = mc_node_get(dgrpid, dgrp_head))) {
1405 			continue;
1406 		}
1407 
1408 		for (j = 0; j < NDIMMS_PER_DGRP; j++) {
1409 			devid = dgrpid * NDIMMS_PER_DGRP + j;
1410 			if (devp = mc_node_get(devid, device_head)) {
1411 				mc_node_del((mc_dlist_t *)devp,
1412 				    &device_head, &device_tail);
1413 				kmem_free(devp, sizeof (*devp));
1414 			} else
1415 				DPRINTF(MC_DESTRC_DEBUG,
1416 				    ("mc_delete: no dev %d\n", devid));
1417 		}
1418 
1419 		mc_node_del((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1420 		kmem_free(dgrp, sizeof (*dgrp));
1421 	}
1422 
1423 	/* delete all banks and associated segments */
1424 	for (i = 0; i < NLOGBANKS_PER_MC; i++) {
1425 		bankid = mc_id * NLOGBANKS_PER_MC + i;
1426 		if (!(banki = mc_node_get(bankid, bank_head))) {
1427 			continue;
1428 		}
1429 
1430 		/* bank and segments go together */
1431 		if (!(segi = mc_node_get(banki->seg_id, seg_head))) {
1432 			mc_node_del((mc_dlist_t *)segi, &seg_head, &seg_tail);
1433 			kmem_free(segi, sizeof (*segi));
1434 			nsegments--;
1435 		}
1436 
1437 		mc_node_del((mc_dlist_t *)banki, &bank_head, &bank_tail);
1438 		kmem_free(banki, sizeof (*banki));
1439 	}
1440 
1441 	mutex_exit(&mcdatamutex);
1442 }
1443 
1444 /*
1445  * mc_dlist is a double linking list, including unique id, and pointers to
1446  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1447  * and mctrl_info has it at the top to share the operations, add, del, and get.
1448  *
1449  * The new node is added at the tail and is not sorted.
1450  *
1451  * Input: The pointer of node to be added, head and tail of the list
1452  */
1453 
1454 static void
1455 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1456 {
1457 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1458 	    node->id, (void *) *head, (void *) *tail));
1459 
1460 	if (*head != NULL) {
1461 		node->prev = *tail;
1462 		node->next = (*tail)->next;
1463 		(*tail)->next = node;
1464 		*tail = node;
1465 	} else {
1466 		node->next = node->prev = NULL;
1467 		*head = *tail = node;
1468 	}
1469 }
1470 
1471 /*
1472  * Input: The pointer of node to be deleted, head and tail of the list
1473  *
1474  * Deleted node will be at the following positions
1475  * 1. At the tail of the list
1476  * 2. At the head of the list
1477  * 3. At the head and tail of the list, i.e. only one left.
1478  * 4. At the middle of the list
1479  */
1480 
1481 static void
1482 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1483 {
1484 	if (node->next == NULL) {
1485 		/* deleted node is at the tail of list */
1486 		*tail = node->prev;
1487 	} else {
1488 		node->next->prev = node->prev;
1489 	}
1490 
1491 	if (node->prev == NULL) {
1492 		/* deleted node is at the head of list */
1493 		*head = node->next;
1494 	} else {
1495 		node->prev->next = node->next;
1496 	}
1497 }
1498 
1499 /*
1500  * Search the list from the head of the list to match the given id
1501  * Input: id and the head of the list
1502  * Return: pointer of found node
1503  */
1504 static void *
1505 mc_node_get(int id, mc_dlist_t *head)
1506 {
1507 	mc_dlist_t *node;
1508 
1509 	node = head;
1510 	while (node != NULL) {
1511 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1512 		    node->id, id));
1513 		if (node->id == id)
1514 			break;
1515 		node = node->next;
1516 	}
1517 	return (node);
1518 }
1519 
1520 /*
1521  * Memory subsystem provides 144 bits (128 Data bits, 9 ECC bits and 7
1522  * unused bits) interface via a pair of DIMMs. Mapping of Data/ECC bits
1523  * to a specific DIMM pin is described by the memory-layout property
1524  * via two tables: dimm table and pin table.
1525  *
1526  * Memory-layout property arranges data/ecc bits in the following order:
1527  *
1528  *   Bit#  143                          16 15       7 6           0
1529  *        |      Data[127:0]              | ECC[8:0] | Unused[6:0] |
1530  *
1531  * dimm table: 1 bit is used to store DIMM number (2 possible DIMMs) for
1532  *	each Data/ECC bit. Thus, it needs 18 bytes (144/8) to represent
1533  *	all Data/ECC bits in this table. Information is stored in big
1534  *	endian order, i.e. dimm_table[0] represents information for
1535  *	logical bit# 143 to 136.
1536  *
1537  * pin table: 1 byte is used to store pin position for each Data/ECC bit.
1538  *	Thus, this table is 144 bytes long. Information is stored in little
1539  *	endian order, i.e, pin_table[0] represents pin number of logical
1540  *	bit 0 and pin_table[143] contains pin number for logical bit 143
1541  *	(i.e. data bit# 127).
1542  *
1543  * qwordmap table below is used to map mc_get_mem_unum "synd_code" value into
1544  * logical bit position assigned above by the memory-layout property.
1545  */
1546 
1547 #define	QWORD_SIZE	144
1548 static uint8_t qwordmap[QWORD_SIZE] =
1549 {
1550 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1551 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1552 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1553 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1554 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1555 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1556 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1557 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1558 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3
1559 };
1560 
1561 
1562 /* ARGSUSED */
1563 static int
1564 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1565 {
1566 	int i;
1567 	int pos_cacheline, position, index, idx4dimm;
1568 	int qwlayout = synd_code;
1569 	short offset, data;
1570 	char unum[UNUM_NAMLEN];
1571 	struct dimm_info *dimmp;
1572 	struct pin_info *pinp;
1573 	struct bank_info *bank;
1574 	struct mctrl_info *mctrl;
1575 
1576 	/*
1577 	 * Enforce old Openboot requirement for synd code, either a single-bit
1578 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1579 	 */
1580 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1581 		return (EINVAL);
1582 
1583 	unum[0] = '\0';
1584 
1585 	DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:qwlayout %d phyaddr 0x%lx\n",
1586 	    qwlayout, paddr));
1587 
1588 	/*
1589 	 * Scan all logical banks to get one responding to the physical
1590 	 * address. Then compute the index to look up dimm and pin tables
1591 	 * to generate the unmuber.
1592 	 */
1593 	mutex_enter(&mcdatamutex);
1594 	bank = (struct bank_info *)bank_head;
1595 	while (bank != NULL) {
1596 		int mcid, mcdgrpid, dimmoffset;
1597 
1598 		/*
1599 		 * Physical Address is in a bank if (Addr & Mask) == Match
1600 		 */
1601 		if ((paddr & bank->mask) != bank->match) {
1602 			bank = (struct bank_info *)bank->bank_node.next;
1603 			continue;
1604 		}
1605 
1606 		mcid = bank->bank_node.id / NLOGBANKS_PER_MC;
1607 		mctrl = mc_node_get(mcid, mctrl_head);
1608 		ASSERT(mctrl != NULL);
1609 
1610 		DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:mc %d bank %d "
1611 		    "dgrp %d\n", mcid, bank->bank_node.id, bank->devgrp_id));
1612 
1613 		mcdgrpid = bank->devgrp_id % NDGRPS_PER_MC;
1614 		dimmoffset = mcdgrpid * NDIMMS_PER_DGRP;
1615 
1616 		dimmp = (struct dimm_info *)mctrl->dimminfop;
1617 		if (dimmp == NULL) {
1618 			mutex_exit(&mcdatamutex);
1619 			return (ENXIO);
1620 		}
1621 
1622 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1623 			/*
1624 			 * single-bit error handling, we can identify specific
1625 			 * DIMM.
1626 			 */
1627 
1628 			pinp = (struct pin_info *)&dimmp->data[0];
1629 
1630 			pos_cacheline = qwordmap[qwlayout];
1631 			position = 143 - pos_cacheline;
1632 			index = position / 8;
1633 			offset = 7 - (position % 8);
1634 
1635 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:position "
1636 			    "%d\n", position));
1637 			/*
1638 			 * Trade-off: We cound't add pin number to
1639 			 * unumber string because statistic number
1640 			 * pumps up at the corresponding dimm not pin.
1641 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1642 			 * pinp->pintable[pos_cacheline]);
1643 			 */
1644 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:pin number "
1645 			    "%1u\n", (uint_t)pinp->pintable[pos_cacheline]));
1646 			data = pinp->dimmtable[index];
1647 			idx4dimm = (data >> offset) & 1;
1648 
1649 			(void) strncpy(unum,
1650 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1651 			    UNUM_NAMLEN);
1652 
1653 			DPRINTF(MC_GUNUM_DEBUG,
1654 				("mc_get_mem_unum:unum %s\n", unum));
1655 
1656 			/*
1657 			 * platform hook for adding label information to unum.
1658 			 */
1659 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, idx4dimm);
1660 		} else {
1661 			char *p = unum;
1662 			size_t res = UNUM_NAMLEN;
1663 
1664 			/*
1665 			 * multi-bit error handling, we can only identify
1666 			 * bank of DIMMs.
1667 			 */
1668 
1669 			for (i = 0; (i < NDIMMS_PER_DGRP) && (res > 0); i++) {
1670 				(void) snprintf(p, res, "%s%s",
1671 				    i == 0 ? "" : " ",
1672 				    (char *)dimmp->label[dimmoffset + i]);
1673 				res -= strlen(p);
1674 				p += strlen(p);
1675 			}
1676 
1677 			/*
1678 			 * platform hook for adding label information
1679 			 * to unum.
1680 			 */
1681 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, -1);
1682 		}
1683 		mutex_exit(&mcdatamutex);
1684 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1685 		    (strlen(unum) >= buflen)) {
1686 			return (ENOSPC);
1687 		} else {
1688 			(void) strncpy(buf, unum, UNUM_NAMLEN);
1689 			*lenp = strlen(buf);
1690 			return (0);
1691 		}
1692 	}	/* end of while loop for logic bank list */
1693 
1694 	mutex_exit(&mcdatamutex);
1695 	return (ENXIO);
1696 }
1697 
1698 static int
1699 mc_get_mem_info(int synd_code, uint64_t paddr,
1700     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1701     int *segsp, int *banksp, int *mcidp)
1702 {
1703 	struct bank_info *bankp;
1704 
1705 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1706 		return (EINVAL);
1707 
1708 	/*
1709 	 * Scan all logical banks to get one responding to the physical
1710 	 * address. Then compute the index to look up dimm and pin tables
1711 	 * to generate the unmuber.
1712 	 */
1713 	mutex_enter(&mcdatamutex);
1714 	bankp = (struct bank_info *)bank_head;
1715 	while (bankp != NULL) {
1716 		struct seg_info *segp;
1717 		int mcid;
1718 
1719 		/*
1720 		 * Physical Address is in a bank if (Addr & Mask) == Match
1721 		 */
1722 		if ((paddr & bankp->mask) != bankp->match) {
1723 			bankp = (struct bank_info *)bankp->bank_node.next;
1724 			continue;
1725 		}
1726 
1727 		mcid = bankp->bank_node.id / NLOGBANKS_PER_MC;
1728 
1729 		/*
1730 		 * Get the corresponding segment.
1731 		 */
1732 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1733 		    seg_head)) == NULL) {
1734 			mutex_exit(&mcdatamutex);
1735 			return (EFAULT);
1736 		}
1737 
1738 		*mem_sizep = memsize;
1739 		*seg_sizep = segp->size;
1740 		*bank_sizep = bankp->size;
1741 		*segsp = nsegments;
1742 		*banksp = segp->nbanks;
1743 		*mcidp = mcid;
1744 
1745 		mutex_exit(&mcdatamutex);
1746 		return (0);
1747 
1748 	}	/* end of while loop for logic bank list */
1749 
1750 	mutex_exit(&mcdatamutex);
1751 	return (ENXIO);
1752 }
1753 /*
1754  * mc-us3i driver allows a platform to add extra label
1755  * information to the unum string. If a platform implements a
1756  * kernel function called plat_add_mem_unum_label() it will be
1757  * executed. This would typically be implemented in the platmod.
1758  */
1759 static void
1760 mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
1761 {
1762 	if (&plat_add_mem_unum_label)
1763 		plat_add_mem_unum_label(unum, mcid, bank, dimm);
1764 }
1765