xref: /illumos-gate/usr/src/uts/sun4u/io/mc-us3i.c (revision 9a016c63ca347047a236dff12f0da83aac8981d1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/stat.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/open.h>
39 #include <sys/thread.h>
40 #include <sys/cpuvar.h>
41 #include <sys/x_call.h>
42 #include <sys/debug.h>
43 #include <sys/sysmacros.h>
44 #include <sys/ivintr.h>
45 #include <sys/intr.h>
46 #include <sys/intreg.h>
47 #include <sys/autoconf.h>
48 #include <sys/modctl.h>
49 #include <sys/spl.h>
50 #include <sys/async.h>
51 #include <sys/mc.h>
52 #include <sys/mc-us3i.h>
53 #include <sys/note.h>
54 #include <sys/cpu_module.h>
55 
56 /*
57  * pm-hardware-state value
58  */
59 #define	NO_SUSPEND_RESUME	"no-suspend-resume"
60 
61 /*
62  * Function prototypes
63  */
64 
65 static int mc_open(dev_t *, int, int, cred_t *);
66 static int mc_close(dev_t, int, int, cred_t *);
67 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
68 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
69 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
70 
71 /*
72  * Configuration data structures
73  */
74 static struct cb_ops mc_cb_ops = {
75 	mc_open,			/* open */
76 	mc_close,			/* close */
77 	nulldev,			/* strategy */
78 	nulldev,			/* print */
79 	nodev,				/* dump */
80 	nulldev,			/* read */
81 	nulldev,			/* write */
82 	mc_ioctl,			/* ioctl */
83 	nodev,				/* devmap */
84 	nodev,				/* mmap */
85 	nodev,				/* segmap */
86 	nochpoll,			/* poll */
87 	ddi_prop_op,			/* cb_prop_op */
88 	0,				/* streamtab */
89 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
90 	CB_REV,				/* rev */
91 	nodev,				/* cb_aread */
92 	nodev				/* cb_awrite */
93 };
94 
95 static struct dev_ops mc_ops = {
96 	DEVO_REV,			/* rev */
97 	0,				/* refcnt  */
98 	ddi_no_info,			/* getinfo */
99 	nulldev,			/* identify */
100 	nulldev,			/* probe */
101 	mc_attach,			/* attach */
102 	mc_detach,			/* detach */
103 	nulldev,			/* reset */
104 	&mc_cb_ops,			/* cb_ops */
105 	(struct bus_ops *)0,		/* bus_ops */
106 	nulldev				/* power */
107 };
108 
109 /*
110  * Driver globals
111  */
112 static void *mcp;
113 static int nmcs = 0;
114 static int seg_id;
115 static int nsegments;
116 static uint64_t	memsize;
117 
118 static uint_t	mc_debug = 0;
119 
120 static int getreg;
121 static int nregs;
122 struct memory_reg_info *reg_info;
123 
124 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
125 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
126 static mc_dlist_t *device_head, *device_tail;
127 
128 static kmutex_t	mcmutex;
129 static kmutex_t	mcdatamutex;
130 static int mc_is_open = 0;
131 
132 extern struct mod_ops mod_driverops;
133 
134 static struct modldrv modldrv = {
135 	&mod_driverops,			/* module type, this one is a driver */
136 	"Memory-controller: %I%",	/* module name */
137 	&mc_ops,			/* driver ops */
138 };
139 
140 static struct modlinkage modlinkage = {
141 	MODREV_1,		/* rev */
142 	(void *)&modldrv,
143 	NULL
144 };
145 
146 static int mc_get_memory_reg_info(struct mc_soft_state *softsp);
147 static void mc_construct(struct mc_soft_state *softsp);
148 static void mc_delete(int mc_id);
149 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
150 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
151 static void *mc_node_get(int id, mc_dlist_t *head);
152 static void mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm);
153 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
154     int buflen, int *lenp);
155 static int mc_get_mem_info(int synd_code, uint64_t paddr,
156     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
157     int *segsp, int *banksp, int *mcidp);
158 
159 #pragma weak p2get_mem_unum
160 #pragma weak p2get_mem_info
161 #pragma weak plat_add_mem_unum_label
162 
163 /* For testing only */
164 struct test_unum {
165 	int		synd_code;
166 	uint64_t	paddr;
167 	char 		unum[UNUM_NAMLEN];
168 	int		len;
169 };
170 
171 /*
172  * These are the module initialization routines.
173  */
174 
175 int
176 _init(void)
177 {
178 	int error;
179 
180 	if ((error = ddi_soft_state_init(&mcp,
181 	    sizeof (struct mc_soft_state), 1)) != 0)
182 		return (error);
183 
184 	error =  mod_install(&modlinkage);
185 	if (error == 0) {
186 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
187 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
188 	}
189 
190 	return (error);
191 }
192 
193 int
194 _fini(void)
195 {
196 	int error;
197 
198 	if ((error = mod_remove(&modlinkage)) != 0)
199 		return (error);
200 
201 	ddi_soft_state_fini(&mcp);
202 	mutex_destroy(&mcmutex);
203 	mutex_destroy(&mcdatamutex);
204 	return (0);
205 }
206 
207 int
208 _info(struct modinfo *modinfop)
209 {
210 	return (mod_info(&modlinkage, modinfop));
211 }
212 
213 static int
214 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
215 {
216 	struct mc_soft_state *softsp;
217 	struct dimm_info *dimminfop;
218 	int instance, len, err;
219 	int mcreg1_len;
220 
221 	switch (cmd) {
222 	case DDI_ATTACH:
223 		break;
224 
225 	case DDI_RESUME:
226 		return (DDI_SUCCESS);
227 
228 	default:
229 		return (DDI_FAILURE);
230 	}
231 
232 	instance = ddi_get_instance(devi);
233 
234 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
235 		return (DDI_FAILURE);
236 
237 	softsp = ddi_get_soft_state(mcp, instance);
238 
239 	/* Set the dip in the soft state */
240 	softsp->dip = devi;
241 
242 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
243 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
244 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property\n",
245 		    instance, "portid"));
246 		goto bad;
247 	}
248 
249 	DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: mc %d portid %d, cpuid %d\n",
250 	    instance, softsp->portid, CPU->cpu_id));
251 
252 	/* Get the content of Memory Control Register I from obp */
253 	mcreg1_len = sizeof (uint64_t);
254 	if ((ddi_getlongprop_buf(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
255 	    "memory-control-register-1", (caddr_t)&(softsp->mcreg1),
256 	    &mcreg1_len) == DDI_PROP_SUCCESS) &&
257 	    (mcreg1_len == sizeof (uint64_t))) {
258 		softsp->mcr_read_ok = 1;
259 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d from obp: Reg1: 0x%lx\n",
260 		instance, softsp->mcreg1));
261 	}
262 
263 	/* attach fails if mcreg1 cannot be accessed */
264 	if (!softsp->mcr_read_ok) {
265 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get mcreg1\n",
266 		    instance));
267 		goto bad;
268 	}
269 
270 	/* nothing to suspend/resume here */
271 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
272 	    "pm-hardware-state", NO_SUSPEND_RESUME,
273 	    sizeof (NO_SUSPEND_RESUME));
274 
275 	/*
276 	 * Get the label of dimms and pin routing information from the
277 	 * memory-layout property of the memory controller.
278 	 */
279 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
280 	    "memory-layout", (caddr_t)&dimminfop, &len);
281 	if (err == DDI_PROP_SUCCESS && dimminfop->table_width == 1) {
282 		/* Set the pointer and size of property in the soft state */
283 		softsp->memlayoutp = dimminfop;
284 		softsp->memlayoutlen = len;
285 	} else {
286 		/*
287 		 * memory-layout property was not found or some other
288 		 * error occured, plat_get_mem_unum() will not work
289 		 * for this mc.
290 		 */
291 		softsp->memlayoutp = NULL;
292 		softsp->memlayoutlen = 0;
293 		DPRINTF(MC_ATTACH_DEBUG,
294 		    ("mc %d: missing or unsupported memory-layout property\n",
295 		    instance));
296 	}
297 
298 	mutex_enter(&mcmutex);
299 
300 	/* Get the physical segments from memory/reg, just once for all MC */
301 	if (!getreg) {
302 		if (mc_get_memory_reg_info(softsp) != 0) {
303 			goto bad1;
304 		}
305 		getreg = 1;
306 	}
307 
308 	/* Construct the physical and logical layout of the MC */
309 	mc_construct(softsp);
310 
311 	if (nmcs == 1) {
312 		if (&p2get_mem_unum)
313 			p2get_mem_unum = mc_get_mem_unum;
314 		if (&p2get_mem_info)
315 			p2get_mem_info = mc_get_mem_info;
316 	}
317 
318 	if (ddi_create_minor_node(devi, "mc-us3i", S_IFCHR, instance,
319 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
320 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
321 		    " failed \n"));
322 		goto bad1;
323 	}
324 	mutex_exit(&mcmutex);
325 
326 	ddi_report_dev(devi);
327 	return (DDI_SUCCESS);
328 
329 bad1:
330 	/* release all allocated data struture for this MC */
331 	mc_delete(softsp->portid);
332 	mutex_exit(&mcmutex);
333 	if (softsp->memlayoutp != NULL)
334 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
335 
336 bad:
337 	cmn_err(CE_WARN, "mc-us3i: attach failed for instance %d\n", instance);
338 	ddi_soft_state_free(mcp, instance);
339 	return (DDI_FAILURE);
340 }
341 
342 /* ARGSUSED */
343 static int
344 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
345 {
346 	int instance;
347 	struct mc_soft_state *softsp;
348 
349 	/* get the instance of this devi */
350 	instance = ddi_get_instance(devi);
351 
352 	/* get the soft state pointer for this device node */
353 	softsp = ddi_get_soft_state(mcp, instance);
354 
355 	switch (cmd) {
356 	case DDI_SUSPEND:
357 		return (DDI_SUCCESS);
358 
359 	case DDI_DETACH:
360 		break;
361 
362 	default:
363 		return (DDI_FAILURE);
364 	}
365 
366 	DPRINTF(MC_DETACH_DEBUG, ("mc %d DETACH: portid %d\n", instance,
367 	    softsp->portid));
368 
369 	mutex_enter(&mcmutex);
370 
371 	/* release all allocated data struture for this MC */
372 	mc_delete(softsp->portid);
373 
374 	if (softsp->memlayoutp != NULL)
375 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
376 
377 	if (nmcs == 0) {
378 		if (&p2get_mem_unum)
379 			p2get_mem_unum = NULL;
380 		if (&p2get_mem_info)
381 			p2get_mem_info = NULL;
382 	}
383 
384 	mutex_exit(&mcmutex);
385 
386 	ddi_remove_minor_node(devi, NULL);
387 	/* free up the soft state */
388 	ddi_soft_state_free(mcp, instance);
389 
390 	return (DDI_SUCCESS);
391 }
392 
393 /* ARGSUSED */
394 static int
395 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
396 {
397 	int status = 0;
398 
399 	/* verify that otyp is appropriate */
400 	if (otyp != OTYP_CHR) {
401 		return (EINVAL);
402 	}
403 
404 	mutex_enter(&mcmutex);
405 	/* At least one attached? */
406 	if (nmcs == 0) {
407 		status = ENXIO;
408 		goto bad;
409 	}
410 
411 	if (mc_is_open) {
412 		status = EBUSY;
413 		goto bad;
414 	}
415 	mc_is_open = 1;
416 bad:
417 
418 	mutex_exit(&mcmutex);
419 	return (status);
420 }
421 
422 /* ARGSUSED */
423 static int
424 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
425 {
426 	mutex_enter(&mcmutex);
427 	mc_is_open = 0;
428 	mutex_exit(&mcmutex);
429 
430 	return (0);
431 }
432 
433 /*
434  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
435  * MCIOC_CTRLCONF, MCIOC_CONTROL.
436  *
437  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
438  * associated with various length struct. If given number is less than the
439  * number in kernel, update the number and return EINVAL so that user could
440  * allocate enough space for it.
441  *
442  */
443 
444 /* ARGSUSED */
445 static int
446 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
447 	int *rval_p)
448 {
449 	size_t	size;
450 	struct mc_memconf mcmconf;
451 	struct mc_memory *mcmem, mcmem_in;
452 	struct mc_segment *mcseg, mcseg_in;
453 	struct mc_bank mcbank;
454 	struct mc_devgrp mcdevgrp;
455 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
456 	struct mc_control *mccontrol, mccontrol_in;
457 	struct seg_info *seg = NULL;
458 	struct bank_info *bank = NULL;
459 	struct dgrp_info *dgrp = NULL;
460 	struct mctrl_info *mcport;
461 	mc_dlist_t *mctrl;
462 	int i, status = 0;
463 	cpu_t *cpu;
464 
465 	switch (cmd) {
466 	case MCIOC_MEMCONF:
467 		mutex_enter(&mcdatamutex);
468 
469 		mcmconf.nmcs = nmcs;
470 		mcmconf.nsegments = nsegments;
471 		mcmconf.nbanks = NLOGBANKS_PER_SEG;
472 		mcmconf.ndevgrps = NDGRPS_PER_MC;
473 		mcmconf.ndevs = NDIMMS_PER_DGRP;
474 		mcmconf.len_dev = MAX_DEVLEN;
475 		mcmconf.xfer_size = TRANSFER_SIZE;
476 
477 		mutex_exit(&mcdatamutex);
478 
479 		if (copyout(&mcmconf, (void *)arg, sizeof (mcmconf)))
480 			return (EFAULT);
481 		return (0);
482 
483 	/*
484 	 * input: nsegments and allocate space for various length of segmentids
485 	 *
486 	 * return    0: size, number of segments, and all segment ids,
487 	 *		where glocal and local ids are identical.
488 	 *	EINVAL: if the given nsegments is less than that in kernel and
489 	 *		nsegments of struct will be updated.
490 	 *	EFAULT: if other errors in kernel.
491 	 */
492 	case MCIOC_MEM:
493 		if (copyin((void *)arg, &mcmem_in, sizeof (mcmem_in)) != 0)
494 			return (EFAULT);
495 
496 		mutex_enter(&mcdatamutex);
497 		if (mcmem_in.nsegments < nsegments) {
498 			mcmem_in.nsegments = nsegments;
499 			mutex_exit(&mcdatamutex);
500 			if (copyout(&mcmem_in, (void *)arg, sizeof (mcmem_in)))
501 				status = EFAULT;
502 			else
503 				status = EINVAL;
504 
505 			return (status);
506 		}
507 
508 		size = sizeof (*mcmem) + (nsegments - 1) *
509 		    sizeof (mcmem->segmentids[0]);
510 		mcmem = kmem_zalloc(size, KM_SLEEP);
511 
512 		mcmem->size = memsize;
513 		mcmem->nsegments = nsegments;
514 		seg = (struct seg_info *)seg_head;
515 		for (i = 0; i < nsegments; i++) {
516 			ASSERT(seg != NULL);
517 			mcmem->segmentids[i].globalid = seg->seg_node.id;
518 			mcmem->segmentids[i].localid = seg->seg_node.id;
519 			seg = (struct seg_info *)seg->seg_node.next;
520 		}
521 		mutex_exit(&mcdatamutex);
522 
523 		if (copyout(mcmem, (void *)arg, size))
524 			status = EFAULT;
525 
526 		kmem_free(mcmem, size);
527 		return (status);
528 
529 	/*
530 	 * input: id, nbanks and allocate space for various length of bankids
531 	 *
532 	 * return    0: base, size, number of banks, and all bank ids,
533 	 *		where global id is unique of all banks and local id
534 	 *		is only unique for mc.
535 	 *	EINVAL: either id isn't found or if given nbanks is less than
536 	 *		that in kernel and nbanks of struct will be updated.
537 	 *	EFAULT: if other errors in kernel.
538 	 */
539 	case MCIOC_SEG:
540 
541 		if (copyin((void *)arg, &mcseg_in, sizeof (mcseg_in)) != 0)
542 			return (EFAULT);
543 
544 		mutex_enter(&mcdatamutex);
545 		if ((seg = mc_node_get(mcseg_in.id, seg_head)) == NULL) {
546 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
547 			    "id %d\n", mcseg_in.id));
548 			mutex_exit(&mcdatamutex);
549 			return (EFAULT);
550 		}
551 
552 		if (mcseg_in.nbanks < seg->nbanks) {
553 			mcseg_in.nbanks = seg->nbanks;
554 			mutex_exit(&mcdatamutex);
555 			if (copyout(&mcseg_in, (void *)arg, sizeof (mcseg_in)))
556 				status = EFAULT;
557 			else
558 				status = EINVAL;
559 
560 			return (status);
561 		}
562 
563 		size = sizeof (*mcseg) + (seg->nbanks - 1) *
564 		    sizeof (mcseg->bankids[0]);
565 		mcseg = kmem_zalloc(size, KM_SLEEP);
566 
567 		mcseg->id = seg->seg_node.id;
568 		mcseg->ifactor = seg->ifactor;
569 		mcseg->base = seg->base;
570 		mcseg->size = seg->size;
571 		mcseg->nbanks = seg->nbanks;
572 
573 		bank = seg->head;
574 
575 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg %p bank %p\n",
576 		    seg->nbanks, (void *) seg, (void *) bank));
577 
578 		i = 0;
579 		while (bank != NULL) {
580 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
581 			    i, bank->bank_node.id));
582 			mcseg->bankids[i].globalid = bank->bank_node.id;
583 			mcseg->bankids[i++].localid = bank->local_id;
584 			bank = bank->next;
585 		}
586 		ASSERT(i == seg->nbanks);
587 		mutex_exit(&mcdatamutex);
588 
589 		if (copyout(mcseg, (void *)arg, size))
590 			status = EFAULT;
591 
592 		kmem_free(mcseg, size);
593 		return (status);
594 
595 	/*
596 	 * input: id
597 	 *
598 	 * return    0: mask, match, size, and devgrpid,
599 	 *		where global id is unique of all devgrps and local id
600 	 *		is only unique for mc.
601 	 *	EINVAL: if id isn't found
602 	 *	EFAULT: if other errors in kernel.
603 	 */
604 	case MCIOC_BANK:
605 		if (copyin((void *)arg, &mcbank, sizeof (mcbank)) != 0)
606 			return (EFAULT);
607 
608 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
609 
610 		mutex_enter(&mcdatamutex);
611 
612 		if ((bank = mc_node_get(mcbank.id, bank_head)) == NULL) {
613 			mutex_exit(&mcdatamutex);
614 			return (EINVAL);
615 		}
616 
617 		mcbank.mask = bank->mask;
618 		mcbank.match = bank->match;
619 		mcbank.size = bank->size;
620 		mcbank.devgrpid.globalid = bank->devgrp_id;
621 		mcbank.devgrpid.localid =
622 		    bank->bank_node.id % NLOGBANKS_PER_SEG;
623 
624 		mutex_exit(&mcdatamutex);
625 
626 		if (copyout(&mcbank, (void *)arg, sizeof (mcbank)))
627 			return (EFAULT);
628 		return (0);
629 
630 	/*
631 	 * input:id and allocate space for various length of deviceids
632 	 *
633 	 * return    0: size and number of devices.
634 	 *	EINVAL: id isn't found
635 	 *	EFAULT: if other errors in kernel.
636 	 */
637 	case MCIOC_DEVGRP:
638 
639 		if (copyin((void *)arg, &mcdevgrp, sizeof (mcdevgrp)) != 0)
640 			return (EFAULT);
641 
642 		mutex_enter(&mcdatamutex);
643 		if ((dgrp = mc_node_get(mcdevgrp.id, dgrp_head)) == NULL) {
644 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
645 			    "%d\n", mcdevgrp.id));
646 			mutex_exit(&mcdatamutex);
647 			return (EINVAL);
648 		}
649 
650 		mcdevgrp.ndevices = dgrp->ndevices;
651 		mcdevgrp.size = dgrp->size;
652 
653 		mutex_exit(&mcdatamutex);
654 
655 		if (copyout(&mcdevgrp, (void *)arg, sizeof (mcdevgrp)))
656 			status = EFAULT;
657 
658 		return (status);
659 
660 	/*
661 	 * input: nmcs and allocate space for various length of mcids
662 	 *
663 	 * return    0: number of mc, and all mcids,
664 	 *		where glocal and local ids are identical.
665 	 *	EINVAL: if the given nmcs is less than that in kernel and
666 	 *		nmcs of struct will be updated.
667 	 *	EFAULT: if other errors in kernel.
668 	 */
669 	case MCIOC_CTRLCONF:
670 		if (copyin((void *)arg, &mcctrlconf_in,
671 		    sizeof (mcctrlconf_in)) != 0)
672 			return (EFAULT);
673 
674 		mutex_enter(&mcdatamutex);
675 		if (mcctrlconf_in.nmcs < nmcs) {
676 			mcctrlconf_in.nmcs = nmcs;
677 			mutex_exit(&mcdatamutex);
678 			if (copyout(&mcctrlconf_in, (void *)arg,
679 			    sizeof (mcctrlconf_in)))
680 				status = EFAULT;
681 			else
682 				status = EINVAL;
683 
684 			return (status);
685 		}
686 
687 		/*
688 		 * Cannot just use the size of the struct because of the various
689 		 * length struct
690 		 */
691 		size = sizeof (*mcctrlconf) + ((nmcs - 1) *
692 		    sizeof (mcctrlconf->mcids[0]));
693 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
694 
695 		mcctrlconf->nmcs = nmcs;
696 
697 		/* Get all MC ids and add to mcctrlconf */
698 		mctrl = mctrl_head;
699 		i = 0;
700 		while (mctrl != NULL) {
701 			mcctrlconf->mcids[i].globalid = mctrl->id;
702 			mcctrlconf->mcids[i].localid = mctrl->id;
703 			i++;
704 			mctrl = mctrl->next;
705 		}
706 		ASSERT(i == nmcs);
707 
708 		mutex_exit(&mcdatamutex);
709 
710 		if (copyout(mcctrlconf, (void *)arg, size))
711 			status = EFAULT;
712 
713 		kmem_free(mcctrlconf, size);
714 		return (status);
715 
716 	/*
717 	 * input:id, ndevgrps and allocate space for various length of devgrpids
718 	 *
719 	 * return    0: number of devgrp, and all devgrpids,
720 	 *		is unique of all devgrps and local id is only unique
721 	 *		for mc.
722 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
723 	 *		less than that in kernel and ndevgrps of struct will
724 	 *		be updated.
725 	 *	EFAULT: if other errors in kernel.
726 	 */
727 	case MCIOC_CONTROL:
728 		if (copyin((void *)arg, &mccontrol_in,
729 		    sizeof (mccontrol_in)) != 0)
730 			return (EFAULT);
731 
732 		mutex_enter(&mcdatamutex);
733 		if ((mcport = mc_node_get(mccontrol_in.id,
734 		    mctrl_head)) == NULL) {
735 			mutex_exit(&mcdatamutex);
736 			return (EINVAL);
737 		}
738 
739 		/*
740 		 * mcport->ndevgrps zero means Memory Controller is disable.
741 		 */
742 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
743 		    (mcport->ndevgrps == 0)) {
744 			mccontrol_in.ndevgrps = mcport->ndevgrps;
745 			mutex_exit(&mcdatamutex);
746 			if (copyout(&mccontrol_in, (void *)arg,
747 			    sizeof (mccontrol_in)))
748 				status = EFAULT;
749 			else if (mcport->ndevgrps != 0)
750 				status = EINVAL;
751 
752 			return (status);
753 		}
754 
755 		size = sizeof (*mccontrol) + (mcport->ndevgrps - 1) *
756 		    sizeof (mccontrol->devgrpids[0]);
757 		mccontrol = kmem_zalloc(size, KM_SLEEP);
758 
759 		mccontrol->id = mcport->mctrl_node.id;
760 		mccontrol->ndevgrps = mcport->ndevgrps;
761 		for (i = 0; i < mcport->ndevgrps; i++) {
762 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
763 			mccontrol->devgrpids[i].localid =
764 			    mcport->devgrpids[i] % NDGRPS_PER_MC;
765 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %d\n",
766 			    i));
767 		}
768 		mutex_exit(&mcdatamutex);
769 
770 		if (copyout(mccontrol, (void *)arg, size))
771 			status = EFAULT;
772 
773 		kmem_free(mccontrol, size);
774 		return (status);
775 
776 	/*
777 	 * input:id
778 	 *
779 	 * return    0: CPU flushed successfully.
780 	 *	EINVAL: the id wasn't found
781 	 */
782 	case MCIOC_ECFLUSH:
783 		mutex_enter(&cpu_lock);
784 		cpu = cpu_get((processorid_t)arg);
785 		mutex_exit(&cpu_lock);
786 		if (cpu == NULL)
787 			return (EINVAL);
788 
789 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
790 
791 		return (0);
792 
793 	default:
794 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
795 		return (EFAULT);
796 	}
797 }
798 
799 /*
800  * Gets the reg property from the memory node. This provides the various
801  * memory segments, at bank-boundries, dimm-pair boundries, in the form
802  * of [base, size] pairs. Continuous segments, spanning boundries are
803  * merged into one.
804  * Returns 0 for success and -1 for failure.
805  */
806 static int
807 mc_get_memory_reg_info(struct mc_soft_state *softsp)
808 {
809 	dev_info_t *devi;
810 	int len;
811 	int i;
812 	struct memory_reg_info *mregi;
813 
814 	_NOTE(ARGUNUSED(softsp))
815 
816 	if ((devi = ddi_find_devinfo("memory", -1, 0)) == NULL) {
817 		DPRINTF(MC_REG_DEBUG,
818 		    ("mc-us3i: cannot find memory node under root\n"));
819 		return (-1);
820 	}
821 
822 	if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
823 	    "reg", (caddr_t)&reg_info, &len) != DDI_PROP_SUCCESS) {
824 		DPRINTF(MC_REG_DEBUG,
825 		    ("mc-us3i: reg undefined under memory\n"));
826 		return (-1);
827 	}
828 
829 	nregs = len/sizeof (*mregi);
830 
831 	DPRINTF(MC_REG_DEBUG, ("mc_get_memory_reg_info: nregs %d"
832 	    "reg_info %p\n", nregs, (void *) reg_info));
833 
834 	mregi = reg_info;
835 
836 	/* debug printfs  */
837 	for (i = 0; i < nregs; i++) {
838 		DPRINTF(MC_REG_DEBUG, (" [0x%lx, 0x%lx] ",
839 		    mregi->base, mregi->size));
840 		mregi++;
841 	}
842 
843 	return (0);
844 }
845 
846 /*
847  * Initialize a logical bank
848  */
849 static struct bank_info *
850 mc_add_bank(int bankid, uint64_t mask, uint64_t match, uint64_t size,
851     int dgrpid)
852 {
853 	struct bank_info *banki;
854 
855 	if ((banki = mc_node_get(bankid, bank_head)) != NULL) {
856 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: bank %d exists\n",
857 		    bankid));
858 		return (banki);
859 	}
860 
861 	banki = kmem_zalloc(sizeof (*banki), KM_SLEEP);
862 
863 	banki->bank_node.id = bankid;
864 	banki->devgrp_id = dgrpid;
865 	banki->mask = mask;
866 	banki->match = match;
867 	banki->base = match;
868 	banki->size = size;
869 
870 	mc_node_add((mc_dlist_t *)banki, &bank_head, &bank_tail);
871 
872 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: id %d mask 0x%lx match 0x%lx"
873 	    " base 0x%lx size 0x%lx\n", bankid, mask, match,
874 	    banki->base, banki->size));
875 
876 	return (banki);
877 }
878 
879 /*
880  * Use the bank's base address to find out whether to initialize a new segment,
881  * or weave the bank into an existing segment. If the tail bank of a previous
882  * segment is not continuous with the new bank, the new bank goes into a new
883  * segment.
884  */
885 static void
886 mc_add_segment(struct bank_info *banki)
887 {
888 	struct seg_info *segi;
889 	struct bank_info *tb;
890 
891 	/* does this bank start a new segment? */
892 	if ((segi = mc_node_get(seg_id, seg_head)) == NULL) {
893 		/* this should happen for the first segment only */
894 		goto new_seg;
895 	}
896 
897 	tb = segi->tail;
898 	/* discontiguous banks go into a new segment, increment the seg_id */
899 	if (banki->base > (tb->base + tb->size)) {
900 		seg_id++;
901 		goto new_seg;
902 	}
903 
904 	/* weave the bank into the segment */
905 	segi->nbanks++;
906 	tb->next = banki;
907 
908 	banki->seg_id = segi->seg_node.id;
909 	banki->local_id = tb->local_id + 1;
910 
911 	/* contiguous or interleaved? */
912 	if (banki->base != (tb->base + tb->size))
913 		segi->ifactor++;
914 
915 	segi->size += banki->size;
916 	segi->tail = banki;
917 
918 	memsize += banki->size;
919 
920 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d add bank: id %d"
921 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
922 	    banki->size));
923 
924 	return;
925 
926 new_seg:
927 	segi = kmem_zalloc(sizeof (*segi), KM_SLEEP);
928 
929 	segi->seg_node.id = seg_id;
930 	segi->nbanks = 1;
931 	segi->ifactor = 1;
932 	segi->base = banki->base;
933 	segi->size = banki->size;
934 	segi->head = banki;
935 	segi->tail = banki;
936 
937 	banki->seg_id = segi->seg_node.id;
938 	banki->local_id = 0;
939 
940 	mc_node_add((mc_dlist_t *)segi, &seg_head, &seg_tail);
941 	nsegments++;
942 
943 	memsize += banki->size;
944 
945 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d new bank: id %d"
946 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
947 	    banki->size));
948 }
949 
950 /*
951  * Returns the address bit number (row index) that controls the logical/external
952  * bank assignment in interleave of kind internal-external same dimm-pair,
953  * internal-external both dimm-pair. This is done by using the dimm-densities
954  * and part-type.
955  */
956 static int
957 get_row_shift(int row_index, struct dgrp_info *dgrp)
958 {
959 	int shift;
960 
961 	switch (dgrp->base_device) {
962 	case BASE_DEVICE_128Mb:
963 	case BASE_DEVICE_256Mb:
964 		/* 128Mb and 256Mb devices have same bank select mask */
965 		shift = ADDR_GEN_128Mb_X8_ROW_0;
966 		break;
967 	case BASE_DEVICE_512Mb:
968 	case BASE_DEVICE_1Gb:
969 		/* 512 and 1Gb devices have same bank select mask */
970 		shift = ADDR_GEN_512Mb_X8_ROW_0;
971 		break;
972 	}
973 
974 	if (dgrp->part_type == PART_TYPE_X4)
975 		shift += 1;
976 
977 	shift += row_index;
978 
979 	return (shift);
980 }
981 
982 
983 static void
984 get_device_select(int interleave, struct dgrp_info *dgrp,
985     int *ds_shift, int *bs_shift)
986 {
987 
988 	switch (interleave) {
989 	case INTERLEAVE_DISABLE:
990 	/* Fall Through */
991 	case INTERLEAVE_INTERNAL:
992 		/* Bit 33 selects the dimm group/pair */
993 		*ds_shift = DIMM_PAIR_SELECT_SHIFT;
994 		if (dgrp->nlogbanks == 2) {
995 			/* Bit 32 selects the logical bank */
996 			*bs_shift = LOG_BANK_SELECT_SHIFT;
997 		}
998 		break;
999 	case INTERLEAVE_INTEXT_SAME_DIMM_PAIR:
1000 		/* Bit 33 selects the dimm group/pair */
1001 		*ds_shift =  DIMM_PAIR_SELECT_SHIFT;
1002 		if (dgrp->nlogbanks == 2) {
1003 			/* Row[2] selects the logical bank */
1004 			*bs_shift = get_row_shift(2, dgrp);
1005 		}
1006 		break;
1007 	case INTERLEAVE_INTEXT_BOTH_DIMM_PAIR:
1008 		if (dgrp->nlogbanks == 2) {
1009 			/* Row[3] selects the dimm group/pair */
1010 			*ds_shift = get_row_shift(3, dgrp);
1011 
1012 			/* Row[2] selects the logical bank */
1013 			*bs_shift = get_row_shift(2, dgrp);
1014 		} else {
1015 			/* Row[2] selects the dimm group/pair */
1016 			*ds_shift = get_row_shift(2, dgrp);
1017 		}
1018 		break;
1019 	}
1020 }
1021 
1022 static void
1023 mc_add_xor_banks(struct mctrl_info *mctrl,
1024     uint64_t mask, uint64_t match, int interleave)
1025 {
1026 	int i, j, nbits, nbanks;
1027 	int bankid;
1028 	int dselect[4];
1029 	int ds_shift = -1, bs_shift = -1;
1030 	uint64_t id, size, xmatch;
1031 	struct bank_info *banki;
1032 	struct dgrp_info *dgrp;
1033 
1034 	/* xor mode - assume 2 identical dimm-pairs */
1035 	if ((dgrp = mc_node_get(mctrl->devgrpids[0], dgrp_head)) == NULL) {
1036 		return;
1037 	}
1038 
1039 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1040 
1041 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1042 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1043 
1044 	/* xor enable means, bit 21 is used for dimm-pair select */
1045 	mask |= XOR_DEVICE_SELECT_MASK;
1046 	if (dgrp->nlogbanks == NLOGBANKS_PER_DGRP) {
1047 		/* bit 20 is used for logbank select */
1048 		mask |= XOR_BANK_SELECT_MASK;
1049 	}
1050 
1051 	/* find out the bits set to 1 in mask, nbits can be 2 or 4 */
1052 	nbits = 0;
1053 	for (i = 0; i <= DIMM_PAIR_SELECT_SHIFT; i++) {
1054 		if ((((mask >> i) & 1) == 1) && (nbits < 4)) {
1055 			dselect[nbits] = i;
1056 			nbits++;
1057 		}
1058 	}
1059 
1060 	/* number or banks can be 4 or 16 */
1061 	nbanks = 1 << nbits;
1062 
1063 	size = (dgrp->size * 2)/nbanks;
1064 
1065 	bankid = mctrl->mctrl_node.id * NLOGBANKS_PER_MC;
1066 
1067 	/* each bit position of the mask decides the match & base for bank */
1068 	for (i = 0; i < nbanks; i++) {
1069 		xmatch = 0;
1070 		for (j = 0; j < nbits; j++) {
1071 			xmatch |= (i & (1ULL << j)) << (dselect[j] - j);
1072 		}
1073 		/* xor ds bits to get the dimm-pair */
1074 		id = ((xmatch & (1ULL << ds_shift)) >> ds_shift) ^
1075 			((xmatch & (1ULL << XOR_DEVICE_SELECT_SHIFT)) >>
1076 			XOR_DEVICE_SELECT_SHIFT);
1077 		banki = mc_add_bank(bankid, mask, match | xmatch, size,
1078 		    mctrl->devgrpids[id]);
1079 		mc_add_segment(banki);
1080 		bankid++;
1081 	}
1082 }
1083 
1084 /*
1085  * Based on interleave, dimm-densities, part-type determine the mask
1086  * and match per bank, construct the logical layout by adding segments
1087  * and banks
1088  */
1089 static int
1090 mc_add_dgrp_banks(uint64_t bankid, uint64_t dgrpid,
1091     uint64_t mask, uint64_t match, int interleave)
1092 {
1093 	int nbanks = 0;
1094 	struct bank_info *banki;
1095 	struct dgrp_info *dgrp;
1096 	int ds_shift = -1, bs_shift = -1;
1097 	uint64_t size;
1098 	uint64_t match_save;
1099 
1100 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) == NULL) {
1101 		return (0);
1102 	}
1103 
1104 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1105 
1106 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1107 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1108 	match |= (ds_shift == -1 ? 0 : ((dgrpid & 1) << ds_shift));
1109 	match_save = match;
1110 	size = dgrp->size/dgrp->nlogbanks;
1111 
1112 	/* for bankid 0, 2, 4 .. */
1113 	match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1114 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1115 	    " mask 0x%lx bs_shift %d match 0x%lx\n",
1116 	    interleave, mask, bs_shift, match));
1117 	banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1118 	nbanks++;
1119 	mc_add_segment(banki);
1120 
1121 	if (dgrp->nlogbanks == 2) {
1122 		/*
1123 		 * Set match value to original before adding second
1124 		 * logical bank interleaving information.
1125 		 */
1126 		match = match_save;
1127 		bankid++;
1128 		match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1129 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1130 		    " mask 0x%lx shift %d match 0x%lx\n",
1131 		    interleave, mask, bs_shift, match));
1132 		banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1133 		nbanks++;
1134 		mc_add_segment(banki);
1135 	}
1136 
1137 	return (nbanks);
1138 }
1139 
1140 /*
1141  * Construct the logical layout
1142  */
1143 static void
1144 mc_logical_layout(struct mctrl_info *mctrl, struct mc_soft_state *softsp)
1145 {
1146 	int i;
1147 	uint64_t mcid, bankid, interleave, mask, match;
1148 
1149 	if (mctrl->ndevgrps == 0)
1150 		return;
1151 
1152 	mcid = mctrl->mctrl_node.id;
1153 	mask = MC_SELECT_MASK;
1154 	match = mcid << MC_SELECT_SHIFT;
1155 
1156 	interleave = (softsp->mcreg1 & MCREG1_INTERLEAVE_MASK) >>
1157 	    MCREG1_INTERLEAVE_SHIFT;
1158 
1159 	/* Two dimm pairs and xor bit set */
1160 	if (mctrl->ndevgrps == NDGRPS_PER_MC &&
1161 	    (softsp->mcreg1 & MCREG1_XOR_ENABLE)) {
1162 		mc_add_xor_banks(mctrl, mask, match, interleave);
1163 		return;
1164 	}
1165 
1166 	/*
1167 	 * For xor bit unset or only one dimm pair.
1168 	 * In one dimm pair case, even if xor bit is set, xor
1169 	 * interleaving is only taking place in dimm's internal
1170 	 * banks. Dimm and external bank select bits are the
1171 	 * same as those without xor bit set.
1172 	 */
1173 	bankid = mcid * NLOGBANKS_PER_MC;
1174 	for (i = 0; i < mctrl->ndevgrps; i++) {
1175 		bankid += mc_add_dgrp_banks(bankid, mctrl->devgrpids[i],
1176 				mask, match, interleave);
1177 	}
1178 }
1179 
1180 /*
1181  * Get the dimm-pair's size from the reg_info
1182  */
1183 static uint64_t
1184 get_devgrp_size(uint64_t start)
1185 {
1186 	int i;
1187 	uint64_t size;
1188 	uint64_t end, reg_start, reg_end;
1189 	struct memory_reg_info *regi;
1190 
1191 	/* dgrp end address */
1192 	end = start + DGRP_SIZE_MAX - 1;
1193 
1194 	regi = reg_info;
1195 	size = 0;
1196 	for (i = 0; i < nregs; i++) {
1197 		reg_start = regi->base;
1198 		reg_end = regi->base + regi->size - 1;
1199 
1200 		/* completely outside */
1201 		if ((reg_end < start) || (reg_start > end)) {
1202 			regi++;
1203 			continue;
1204 		}
1205 
1206 		/* completely inside */
1207 		if ((reg_start <= start) && (reg_end >= end)) {
1208 			return (DGRP_SIZE_MAX);
1209 		}
1210 
1211 		/* start is inside, but not the end, get the remainder */
1212 		if (reg_start < start) {
1213 			size = regi->size - (start - reg_start);
1214 			regi++;
1215 			continue;
1216 		}
1217 
1218 		/* add up size for all within range */
1219 		size += regi->size;
1220 		regi++;
1221 	}
1222 
1223 	return (size);
1224 }
1225 
1226 /*
1227  * Each device group is a pair (dimm-pair) of identical single/dual dimms.
1228  * Determine the dimm-pair's dimm-densities and part-type using the MCR-I.
1229  */
1230 static void
1231 mc_add_devgrp(int dgrpid, struct mc_soft_state *softsp)
1232 {
1233 	int i, mcid, devid, dgrpoffset;
1234 	struct dgrp_info *dgrp;
1235 	struct device_info *dev;
1236 	struct dimm_info *dimmp = (struct dimm_info *)softsp->memlayoutp;
1237 
1238 	mcid = softsp->portid;
1239 
1240 	/* add the entry on dgrp_info list */
1241 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) != NULL) {
1242 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: devgrp %d exists\n",
1243 		    dgrpid));
1244 		return;
1245 	}
1246 
1247 	dgrp = kmem_zalloc(sizeof (*dgrp), KM_SLEEP);
1248 
1249 	dgrp->dgrp_node.id = dgrpid;
1250 
1251 	/* a devgrp has identical (type & size) pair */
1252 	if ((dgrpid & 1) == 0) {
1253 		/* dimm-pair 0, 2, 4, 6 */
1254 		if (softsp->mcreg1 & MCREG1_DIMM1_BANK1)
1255 			dgrp->nlogbanks = 2;
1256 		else
1257 			dgrp->nlogbanks = 1;
1258 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN1_MASK) >>
1259 		    MCREG1_ADDRGEN1_SHIFT;
1260 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM1_MASK) >>
1261 		    MCREG1_X4DIMM1_SHIFT;
1262 	} else {
1263 		/* dimm-pair 1, 3, 5, 7 */
1264 		if (softsp->mcreg1 & MCREG1_DIMM2_BANK3)
1265 			dgrp->nlogbanks = 2;
1266 		else
1267 			dgrp->nlogbanks = 1;
1268 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN2_MASK) >>
1269 		    MCREG1_ADDRGEN2_SHIFT;
1270 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM2_MASK) >>
1271 		    MCREG1_X4DIMM2_SHIFT;
1272 	}
1273 
1274 	dgrp->base = MC_BASE(mcid) + DGRP_BASE(dgrpid);
1275 	dgrp->size = get_devgrp_size(dgrp->base);
1276 
1277 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: id %d size %ld logbanks %d"
1278 	    " base_device %d part_type %d\n", dgrpid, dgrp->size,
1279 	    dgrp->nlogbanks, dgrp->base_device, dgrp->part_type));
1280 
1281 	dgrpoffset = dgrpid % NDGRPS_PER_MC;
1282 	dgrp->ndevices = NDIMMS_PER_DGRP;
1283 	/* add the entry for the (identical) pair of dimms/device */
1284 	for (i = 0; i < NDIMMS_PER_DGRP; i++) {
1285 		devid = dgrpid * NDIMMS_PER_DGRP + i;
1286 		dgrp->deviceids[i] = devid;
1287 
1288 		if ((dev = mc_node_get(devid, device_head)) != NULL) {
1289 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: device %d "
1290 			    "exists\n", devid));
1291 			continue;
1292 		}
1293 
1294 		dev = kmem_zalloc(sizeof (*dev), KM_SLEEP);
1295 
1296 		dev->dev_node.id = devid;
1297 
1298 		dev->size = dgrp->size/2;
1299 
1300 		if (dimmp) {
1301 			(void) strncpy(dev->label, (char *)dimmp->label[
1302 			    i + NDIMMS_PER_DGRP * dgrpoffset],
1303 			    MAX_DEVLEN);
1304 
1305 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: dimm %d %s\n",
1306 			    dev->dev_node.id, dev->label));
1307 		}
1308 
1309 		mc_node_add((mc_dlist_t *)dev, &device_head, &device_tail);
1310 	}
1311 
1312 	mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1313 }
1314 
1315 /*
1316  * Construct the physical and logical layout
1317  */
1318 static void
1319 mc_construct(struct mc_soft_state *softsp)
1320 {
1321 	int i, mcid, dgrpid;
1322 	struct mctrl_info *mctrl;
1323 
1324 	mcid = softsp->portid;
1325 
1326 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mcid %d, mcreg1 0x%lx\n",
1327 	    mcid, softsp->mcreg1));
1328 
1329 	/*
1330 	 * Construct the Physical & Logical Layout
1331 	 */
1332 	mutex_enter(&mcdatamutex);
1333 
1334 	/* allocate for mctrl_info */
1335 	if ((mctrl = mc_node_get(mcid, mctrl_head)) != NULL) {
1336 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mctrl %d exists\n",
1337 		    mcid));
1338 		mutex_exit(&mcdatamutex);
1339 		return;
1340 	}
1341 
1342 	mctrl = kmem_zalloc(sizeof (*mctrl), KM_SLEEP);
1343 
1344 	mctrl->mctrl_node.id = mcid;
1345 
1346 	i = 0;
1347 	dgrpid = mcid * NDGRPS_PER_MC;
1348 	if (softsp->mcreg1 & MCREG1_DIMM1_BANK0) {
1349 		mc_add_devgrp(dgrpid, softsp);
1350 		mctrl->devgrpids[i] = dgrpid;
1351 		mctrl->ndevgrps++;
1352 		i++;
1353 	}
1354 
1355 	if (softsp->mcreg1 & MCREG1_DIMM2_BANK2) {
1356 		dgrpid++;
1357 		mc_add_devgrp(dgrpid, softsp);
1358 		mctrl->devgrpids[i] = dgrpid;
1359 		mctrl->ndevgrps++;
1360 	}
1361 
1362 	mc_logical_layout(mctrl, softsp);
1363 
1364 	mctrl->dimminfop = (struct dimm_info *)softsp->memlayoutp;
1365 
1366 	nmcs++;
1367 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1368 
1369 	mutex_exit(&mcdatamutex);
1370 
1371 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: nmcs %d memsize %ld"
1372 	    "nsegments %d\n", nmcs, memsize, nsegments));
1373 }
1374 
1375 /*
1376  * Delete nodes related to the given MC on mc, device group, device,
1377  * and bank lists. Moreover, delete corresponding segment if its connected
1378  * banks are all removed.
1379  */
1380 static void
1381 mc_delete(int mc_id)
1382 {
1383 	int i, j, dgrpid, devid, bankid;
1384 	struct mctrl_info *mctrl;
1385 	struct dgrp_info *dgrp;
1386 	struct device_info *devp;
1387 	struct seg_info *segi;
1388 	struct bank_info *banki;
1389 
1390 	mutex_enter(&mcdatamutex);
1391 
1392 	/* delete mctrl_info */
1393 	if ((mctrl = mc_node_get(mc_id, mctrl_head)) != NULL) {
1394 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1395 		kmem_free(mctrl, sizeof (*mctrl));
1396 		nmcs--;
1397 	} else
1398 		DPRINTF(MC_DESTRC_DEBUG, ("mc_delete: mctrl is not found\n"));
1399 
1400 	/* delete device groups and devices of the detached MC */
1401 	for (i = 0; i < NDGRPS_PER_MC; i++) {
1402 		dgrpid = mc_id * NDGRPS_PER_MC + i;
1403 		if (!(dgrp = mc_node_get(dgrpid, dgrp_head))) {
1404 			continue;
1405 		}
1406 
1407 		for (j = 0; j < NDIMMS_PER_DGRP; j++) {
1408 			devid = dgrpid * NDIMMS_PER_DGRP + j;
1409 			if (devp = mc_node_get(devid, device_head)) {
1410 				mc_node_del((mc_dlist_t *)devp,
1411 				    &device_head, &device_tail);
1412 				kmem_free(devp, sizeof (*devp));
1413 			} else
1414 				DPRINTF(MC_DESTRC_DEBUG,
1415 				    ("mc_delete: no dev %d\n", devid));
1416 		}
1417 
1418 		mc_node_del((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1419 		kmem_free(dgrp, sizeof (*dgrp));
1420 	}
1421 
1422 	/* delete all banks and associated segments */
1423 	for (i = 0; i < NLOGBANKS_PER_MC; i++) {
1424 		bankid = mc_id * NLOGBANKS_PER_MC + i;
1425 		if (!(banki = mc_node_get(bankid, bank_head))) {
1426 			continue;
1427 		}
1428 
1429 		/* bank and segments go together */
1430 		if ((segi = mc_node_get(banki->seg_id, seg_head)) != NULL) {
1431 			mc_node_del((mc_dlist_t *)segi, &seg_head, &seg_tail);
1432 			kmem_free(segi, sizeof (*segi));
1433 			nsegments--;
1434 		}
1435 
1436 		mc_node_del((mc_dlist_t *)banki, &bank_head, &bank_tail);
1437 		kmem_free(banki, sizeof (*banki));
1438 	}
1439 
1440 	mutex_exit(&mcdatamutex);
1441 }
1442 
1443 /*
1444  * mc_dlist is a double linking list, including unique id, and pointers to
1445  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1446  * and mctrl_info has it at the top to share the operations, add, del, and get.
1447  *
1448  * The new node is added at the tail and is not sorted.
1449  *
1450  * Input: The pointer of node to be added, head and tail of the list
1451  */
1452 
1453 static void
1454 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1455 {
1456 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1457 	    node->id, (void *) *head, (void *) *tail));
1458 
1459 	if (*head != NULL) {
1460 		node->prev = *tail;
1461 		node->next = (*tail)->next;
1462 		(*tail)->next = node;
1463 		*tail = node;
1464 	} else {
1465 		node->next = node->prev = NULL;
1466 		*head = *tail = node;
1467 	}
1468 }
1469 
1470 /*
1471  * Input: The pointer of node to be deleted, head and tail of the list
1472  *
1473  * Deleted node will be at the following positions
1474  * 1. At the tail of the list
1475  * 2. At the head of the list
1476  * 3. At the head and tail of the list, i.e. only one left.
1477  * 4. At the middle of the list
1478  */
1479 
1480 static void
1481 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1482 {
1483 	if (node->next == NULL) {
1484 		/* deleted node is at the tail of list */
1485 		*tail = node->prev;
1486 	} else {
1487 		node->next->prev = node->prev;
1488 	}
1489 
1490 	if (node->prev == NULL) {
1491 		/* deleted node is at the head of list */
1492 		*head = node->next;
1493 	} else {
1494 		node->prev->next = node->next;
1495 	}
1496 }
1497 
1498 /*
1499  * Search the list from the head of the list to match the given id
1500  * Input: id and the head of the list
1501  * Return: pointer of found node
1502  */
1503 static void *
1504 mc_node_get(int id, mc_dlist_t *head)
1505 {
1506 	mc_dlist_t *node;
1507 
1508 	node = head;
1509 	while (node != NULL) {
1510 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1511 		    node->id, id));
1512 		if (node->id == id)
1513 			break;
1514 		node = node->next;
1515 	}
1516 	return (node);
1517 }
1518 
1519 /*
1520  * Memory subsystem provides 144 bits (128 Data bits, 9 ECC bits and 7
1521  * unused bits) interface via a pair of DIMMs. Mapping of Data/ECC bits
1522  * to a specific DIMM pin is described by the memory-layout property
1523  * via two tables: dimm table and pin table.
1524  *
1525  * Memory-layout property arranges data/ecc bits in the following order:
1526  *
1527  *   Bit#  143                          16 15       7 6           0
1528  *        |      Data[127:0]              | ECC[8:0] | Unused[6:0] |
1529  *
1530  * dimm table: 1 bit is used to store DIMM number (2 possible DIMMs) for
1531  *	each Data/ECC bit. Thus, it needs 18 bytes (144/8) to represent
1532  *	all Data/ECC bits in this table. Information is stored in big
1533  *	endian order, i.e. dimm_table[0] represents information for
1534  *	logical bit# 143 to 136.
1535  *
1536  * pin table: 1 byte is used to store pin position for each Data/ECC bit.
1537  *	Thus, this table is 144 bytes long. Information is stored in little
1538  *	endian order, i.e, pin_table[0] represents pin number of logical
1539  *	bit 0 and pin_table[143] contains pin number for logical bit 143
1540  *	(i.e. data bit# 127).
1541  *
1542  * qwordmap table below is used to map mc_get_mem_unum "synd_code" value into
1543  * logical bit position assigned above by the memory-layout property.
1544  */
1545 
1546 #define	QWORD_SIZE	144
1547 static uint8_t qwordmap[QWORD_SIZE] =
1548 {
1549 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1550 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1551 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1552 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1553 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1554 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1555 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1556 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1557 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3
1558 };
1559 
1560 
1561 /* ARGSUSED */
1562 static int
1563 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1564 {
1565 	int i;
1566 	int pos_cacheline, position, index, idx4dimm;
1567 	int qwlayout = synd_code;
1568 	short offset, data;
1569 	char unum[UNUM_NAMLEN];
1570 	struct dimm_info *dimmp;
1571 	struct pin_info *pinp;
1572 	struct bank_info *bank;
1573 	struct mctrl_info *mctrl;
1574 
1575 	/*
1576 	 * Enforce old Openboot requirement for synd code, either a single-bit
1577 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1578 	 */
1579 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1580 		return (EINVAL);
1581 
1582 	unum[0] = '\0';
1583 
1584 	DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:qwlayout %d phyaddr 0x%lx\n",
1585 	    qwlayout, paddr));
1586 
1587 	/*
1588 	 * Scan all logical banks to get one responding to the physical
1589 	 * address. Then compute the index to look up dimm and pin tables
1590 	 * to generate the unmuber.
1591 	 */
1592 	mutex_enter(&mcdatamutex);
1593 	bank = (struct bank_info *)bank_head;
1594 	while (bank != NULL) {
1595 		int mcid, mcdgrpid, dimmoffset;
1596 
1597 		/*
1598 		 * Physical Address is in a bank if (Addr & Mask) == Match
1599 		 */
1600 		if ((paddr & bank->mask) != bank->match) {
1601 			bank = (struct bank_info *)bank->bank_node.next;
1602 			continue;
1603 		}
1604 
1605 		mcid = bank->bank_node.id / NLOGBANKS_PER_MC;
1606 		mctrl = mc_node_get(mcid, mctrl_head);
1607 		ASSERT(mctrl != NULL);
1608 
1609 		DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:mc %d bank %d "
1610 		    "dgrp %d\n", mcid, bank->bank_node.id, bank->devgrp_id));
1611 
1612 		mcdgrpid = bank->devgrp_id % NDGRPS_PER_MC;
1613 		dimmoffset = mcdgrpid * NDIMMS_PER_DGRP;
1614 
1615 		dimmp = (struct dimm_info *)mctrl->dimminfop;
1616 		if (dimmp == NULL) {
1617 			mutex_exit(&mcdatamutex);
1618 			return (ENXIO);
1619 		}
1620 
1621 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1622 			/*
1623 			 * single-bit error handling, we can identify specific
1624 			 * DIMM.
1625 			 */
1626 
1627 			pinp = (struct pin_info *)&dimmp->data[0];
1628 
1629 			pos_cacheline = qwordmap[qwlayout];
1630 			position = 143 - pos_cacheline;
1631 			index = position / 8;
1632 			offset = 7 - (position % 8);
1633 
1634 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:position "
1635 			    "%d\n", position));
1636 			/*
1637 			 * Trade-off: We cound't add pin number to
1638 			 * unumber string because statistic number
1639 			 * pumps up at the corresponding dimm not pin.
1640 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1641 			 * pinp->pintable[pos_cacheline]);
1642 			 */
1643 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:pin number "
1644 			    "%1u\n", (uint_t)pinp->pintable[pos_cacheline]));
1645 			data = pinp->dimmtable[index];
1646 			idx4dimm = (data >> offset) & 1;
1647 
1648 			(void) strncpy(unum,
1649 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1650 			    UNUM_NAMLEN);
1651 
1652 			DPRINTF(MC_GUNUM_DEBUG,
1653 				("mc_get_mem_unum:unum %s\n", unum));
1654 
1655 			/*
1656 			 * platform hook for adding label information to unum.
1657 			 */
1658 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, idx4dimm);
1659 		} else {
1660 			char *p = unum;
1661 			size_t res = UNUM_NAMLEN;
1662 
1663 			/*
1664 			 * multi-bit error handling, we can only identify
1665 			 * bank of DIMMs.
1666 			 */
1667 
1668 			for (i = 0; (i < NDIMMS_PER_DGRP) && (res > 0); i++) {
1669 				(void) snprintf(p, res, "%s%s",
1670 				    i == 0 ? "" : " ",
1671 				    (char *)dimmp->label[dimmoffset + i]);
1672 				res -= strlen(p);
1673 				p += strlen(p);
1674 			}
1675 
1676 			/*
1677 			 * platform hook for adding label information
1678 			 * to unum.
1679 			 */
1680 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, -1);
1681 		}
1682 		mutex_exit(&mcdatamutex);
1683 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1684 		    (strlen(unum) >= buflen)) {
1685 			return (ENOSPC);
1686 		} else {
1687 			(void) strncpy(buf, unum, UNUM_NAMLEN);
1688 			*lenp = strlen(buf);
1689 			return (0);
1690 		}
1691 	}	/* end of while loop for logic bank list */
1692 
1693 	mutex_exit(&mcdatamutex);
1694 	return (ENXIO);
1695 }
1696 
1697 static int
1698 mc_get_mem_info(int synd_code, uint64_t paddr,
1699     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1700     int *segsp, int *banksp, int *mcidp)
1701 {
1702 	struct bank_info *bankp;
1703 
1704 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1705 		return (EINVAL);
1706 
1707 	/*
1708 	 * Scan all logical banks to get one responding to the physical
1709 	 * address. Then compute the index to look up dimm and pin tables
1710 	 * to generate the unmuber.
1711 	 */
1712 	mutex_enter(&mcdatamutex);
1713 	bankp = (struct bank_info *)bank_head;
1714 	while (bankp != NULL) {
1715 		struct seg_info *segp;
1716 		int mcid;
1717 
1718 		/*
1719 		 * Physical Address is in a bank if (Addr & Mask) == Match
1720 		 */
1721 		if ((paddr & bankp->mask) != bankp->match) {
1722 			bankp = (struct bank_info *)bankp->bank_node.next;
1723 			continue;
1724 		}
1725 
1726 		mcid = bankp->bank_node.id / NLOGBANKS_PER_MC;
1727 
1728 		/*
1729 		 * Get the corresponding segment.
1730 		 */
1731 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1732 		    seg_head)) == NULL) {
1733 			mutex_exit(&mcdatamutex);
1734 			return (EFAULT);
1735 		}
1736 
1737 		*mem_sizep = memsize;
1738 		*seg_sizep = segp->size;
1739 		*bank_sizep = bankp->size;
1740 		*segsp = nsegments;
1741 		*banksp = segp->nbanks;
1742 		*mcidp = mcid;
1743 
1744 		mutex_exit(&mcdatamutex);
1745 		return (0);
1746 
1747 	}	/* end of while loop for logic bank list */
1748 
1749 	mutex_exit(&mcdatamutex);
1750 	return (ENXIO);
1751 }
1752 /*
1753  * mc-us3i driver allows a platform to add extra label
1754  * information to the unum string. If a platform implements a
1755  * kernel function called plat_add_mem_unum_label() it will be
1756  * executed. This would typically be implemented in the platmod.
1757  */
1758 static void
1759 mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
1760 {
1761 	if (&plat_add_mem_unum_label)
1762 		plat_add_mem_unum_label(unum, mcid, bank, dimm);
1763 }
1764