xref: /titanic_41/usr/src/uts/sun4u/io/mc-us3.c (revision fb9f9b975cb9214fec5dab37d461199adab9b964)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <sys/ddi_impldefs.h>
35 #include <sys/obpdefs.h>
36 #include <sys/cmn_err.h>
37 #include <sys/errno.h>
38 #include <sys/kmem.h>
39 #include <sys/open.h>
40 #include <sys/thread.h>
41 #include <sys/cpuvar.h>
42 #include <sys/x_call.h>
43 #include <sys/debug.h>
44 #include <sys/sysmacros.h>
45 #include <sys/ivintr.h>
46 #include <sys/intr.h>
47 #include <sys/intreg.h>
48 #include <sys/autoconf.h>
49 #include <sys/modctl.h>
50 #include <sys/spl.h>
51 #include <sys/async.h>
52 #include <sys/mc.h>
53 #include <sys/mc-us3.h>
54 #include <sys/cpu_module.h>
55 
56 /*
57  * Function prototypes
58  */
59 
60 static int mc_open(dev_t *, int, int, cred_t *);
61 static int mc_close(dev_t, int, int, cred_t *);
62 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
63 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
64 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /*
67  * Configuration data structures
68  */
69 static struct cb_ops mc_cb_ops = {
70 	mc_open,			/* open */
71 	mc_close,			/* close */
72 	nulldev,			/* strategy */
73 	nulldev,			/* print */
74 	nodev,				/* dump */
75 	nulldev,			/* read */
76 	nulldev,			/* write */
77 	mc_ioctl,			/* ioctl */
78 	nodev,				/* devmap */
79 	nodev,				/* mmap */
80 	nodev,				/* segmap */
81 	nochpoll,			/* poll */
82 	ddi_prop_op,			/* cb_prop_op */
83 	0,				/* streamtab */
84 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
85 	CB_REV,				/* rev */
86 	nodev,				/* cb_aread */
87 	nodev				/* cb_awrite */
88 };
89 
90 static struct dev_ops mc_ops = {
91 	DEVO_REV,			/* rev */
92 	0,				/* refcnt  */
93 	ddi_getinfo_1to1,		/* getinfo */
94 	nulldev,			/* identify */
95 	nulldev,			/* probe */
96 	mc_attach,			/* attach */
97 	mc_detach,			/* detach */
98 	nulldev,			/* reset */
99 	&mc_cb_ops,			/* cb_ops */
100 	(struct bus_ops *)0,		/* bus_ops */
101 	nulldev				/* power */
102 };
103 
104 /*
105  * Driver globals
106  */
107 static void *mcp;
108 static int nmcs = 0;
109 static int seg_id = 0;
110 static int nsegments = 0;
111 static uint64_t memsize = 0;
112 static int maxbanks = 0;
113 
114 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
115 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
116 static mc_dlist_t *device_head, *device_tail;
117 
118 static kmutex_t	mcmutex;
119 static kmutex_t	mcdatamutex;
120 static int mc_is_open = 0;
121 
122 extern struct mod_ops mod_driverops;
123 
124 static struct modldrv modldrv = {
125 	&mod_driverops,			/* module type, this one is a driver */
126 	"Memory-controller: %I%",	/* module name */
127 	&mc_ops,			/* driver ops */
128 };
129 
130 static struct modlinkage modlinkage = {
131 	MODREV_1,		/* rev */
132 	(void *)&modldrv,
133 	NULL
134 };
135 
136 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
137     int buflen, int *lenp);
138 static int mc_get_mem_info(int synd_code, uint64_t paddr,
139     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
140     int *segsp, int *banksp, int *mcidp);
141 static int mc_get_mcregs(struct mc_soft_state *);
142 static void mc_construct(int mc_id, void *dimminfop);
143 static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
144 static void mlayout_del(int mc_id);
145 static struct seg_info *seg_match_base(u_longlong_t base);
146 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
147 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
148 static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
149 static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
150 
151 #pragma weak p2get_mem_unum
152 #pragma weak p2get_mem_info
153 #pragma weak plat_add_mem_unum_label
154 
155 /*
156  * These are the module initialization routines.
157  */
158 
159 int
160 _init(void)
161 {
162 	int error;
163 
164 	if ((error = ddi_soft_state_init(&mcp,
165 	    sizeof (struct mc_soft_state), 1)) != 0)
166 		return (error);
167 
168 	error =  mod_install(&modlinkage);
169 	if (error == 0) {
170 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
171 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
172 	}
173 
174 	return (error);
175 }
176 
177 int
178 _fini(void)
179 {
180 	int error;
181 
182 	if ((error = mod_remove(&modlinkage)) != 0)
183 		return (error);
184 
185 	ddi_soft_state_fini(&mcp);
186 	mutex_destroy(&mcmutex);
187 	mutex_destroy(&mcdatamutex);
188 
189 	return (0);
190 }
191 
192 int
193 _info(struct modinfo *modinfop)
194 {
195 	return (mod_info(&modlinkage, modinfop));
196 }
197 
198 static int
199 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
200 {
201 	struct mc_soft_state *softsp;
202 	struct dimm_info *dimminfop;
203 	int instance, len, err;
204 
205 	/* get the instance of this devi */
206 	instance = ddi_get_instance(devi);
207 
208 	switch (cmd) {
209 	case DDI_ATTACH:
210 		break;
211 
212 	case DDI_RESUME:
213 		/* get the soft state pointer for this device node */
214 		softsp = ddi_get_soft_state(mcp, instance);
215 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
216 		    instance));
217 		/*
218 		 * During resume, the source and target board's bank_infos
219 		 * need to be updated with the new mc MADR values.  This is
220 		 * implemented with existing functionality by first removing
221 		 * the props and allocated data structs, and then adding them
222 		 * back in.
223 		 */
224 		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
225 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
226 		    MEM_CFG_PROP_NAME) == 1) {
227 			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
228 			    MEM_CFG_PROP_NAME);
229 		}
230 		mlayout_del(softsp->portid);
231 		if (mc_get_mcregs(softsp) == -1) {
232 			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
233 			    instance);
234 		}
235 		return (DDI_SUCCESS);
236 
237 	default:
238 		return (DDI_FAILURE);
239 	}
240 
241 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
242 		return (DDI_FAILURE);
243 
244 	softsp = ddi_get_soft_state(mcp, instance);
245 
246 	/* Set the dip in the soft state */
247 	softsp->dip = devi;
248 
249 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
250 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
251 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
252 		    instance, "portid"));
253 		goto bad;
254 	}
255 
256 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
257 	    instance, softsp->portid, CPU->cpu_id));
258 
259 	/* map in the registers for this device. */
260 	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
261 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
262 		    instance));
263 		goto bad;
264 	}
265 
266 	/*
267 	 * Get the label of dimms and pin routing information at memory-layout
268 	 * property if the memory controller is enabled.
269 	 *
270 	 * Basically every memory-controller node on every machine should
271 	 * have one of these properties unless the memory controller is
272 	 * physically not capable of having memory attached to it, e.g.
273 	 * Excalibur's slave processor.
274 	 */
275 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
276 	    "memory-layout", (caddr_t)&dimminfop, &len);
277 	if (err == DDI_PROP_SUCCESS) {
278 		/*
279 		 * Set the pointer and size of property in the soft state
280 		 */
281 		softsp->memlayoutp = dimminfop;
282 		softsp->size = len;
283 	} else if (err == DDI_PROP_NOT_FOUND) {
284 		/*
285 		 * This is a disable MC. Clear out the pointer and size
286 		 * of property in the soft state
287 		 */
288 		softsp->memlayoutp = NULL;
289 		softsp->size = 0;
290 	} else {
291 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
292 		    instance, dimminfop));
293 		goto bad2;
294 	}
295 
296 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
297 	    instance, dimminfop, *(uint64_t *)dimminfop, len));
298 
299 	/* Get MC registers and construct all needed data structure */
300 	if (mc_get_mcregs(softsp) == -1)
301 		goto bad1;
302 
303 	mutex_enter(&mcmutex);
304 	if (nmcs == 1) {
305 		if (&p2get_mem_unum)
306 			p2get_mem_unum = mc_get_mem_unum;
307 		if (&p2get_mem_info)
308 			p2get_mem_info = mc_get_mem_info;
309 	}
310 	mutex_exit(&mcmutex);
311 
312 	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
313 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
314 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
315 		    " failed \n"));
316 		goto bad1;
317 	}
318 
319 	ddi_report_dev(devi);
320 	return (DDI_SUCCESS);
321 
322 bad1:
323 	/* release all allocated data struture for this MC */
324 	mlayout_del(softsp->portid);
325 	if (softsp->memlayoutp != NULL)
326 		kmem_free(softsp->memlayoutp, softsp->size);
327 
328 	/* remove the libdevinfo property */
329 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
330 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
331 	    MEM_CFG_PROP_NAME) == 1) {
332 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
333 			MEM_CFG_PROP_NAME);
334 	}
335 
336 bad2:
337 	/* unmap the registers for this device. */
338 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
339 
340 bad:
341 	ddi_soft_state_free(mcp, instance);
342 	return (DDI_FAILURE);
343 }
344 
345 /* ARGSUSED */
346 static int
347 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
348 {
349 	int instance;
350 	struct mc_soft_state *softsp;
351 
352 	/* get the instance of this devi */
353 	instance = ddi_get_instance(devi);
354 
355 	/* get the soft state pointer for this device node */
356 	softsp = ddi_get_soft_state(mcp, instance);
357 
358 	switch (cmd) {
359 	case DDI_SUSPEND:
360 		return (DDI_SUCCESS);
361 
362 	case DDI_DETACH:
363 		break;
364 
365 	default:
366 		return (DDI_FAILURE);
367 	}
368 
369 	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
370 	    instance, softsp->portid, softsp->memlayoutp));
371 
372 	/* remove the libdevinfo property */
373 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
374 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
375 	    MEM_CFG_PROP_NAME) == 1) {
376 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
377 			MEM_CFG_PROP_NAME);
378 	}
379 
380 	/* release all allocated data struture for this MC */
381 	mlayout_del(softsp->portid);
382 	if (softsp->memlayoutp != NULL)
383 		kmem_free(softsp->memlayoutp, softsp->size);
384 
385 	/* unmap the registers */
386 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
387 
388 	mutex_enter(&mcmutex);
389 	if (nmcs == 0) {
390 		if (&p2get_mem_unum)
391 			p2get_mem_unum = NULL;
392 		if (&p2get_mem_info)
393 			p2get_mem_info = NULL;
394 	}
395 	mutex_exit(&mcmutex);
396 
397 	ddi_remove_minor_node(devi, NULL);
398 
399 	/* free up the soft state */
400 	ddi_soft_state_free(mcp, instance);
401 
402 	return (DDI_SUCCESS);
403 }
404 
405 /* ARGSUSED */
406 static int
407 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
408 {
409 	int status = 0;
410 
411 	/* verify that otyp is appropriate */
412 	if (otyp != OTYP_CHR) {
413 		return (EINVAL);
414 	}
415 
416 	mutex_enter(&mcmutex);
417 	if (mc_is_open) {
418 		status = EBUSY;
419 		goto bad;
420 	}
421 	mc_is_open = 1;
422 bad:
423 	mutex_exit(&mcmutex);
424 	return (status);
425 }
426 
427 /* ARGSUSED */
428 static int
429 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
430 {
431 	mutex_enter(&mcmutex);
432 	mc_is_open = 0;
433 	mutex_exit(&mcmutex);
434 
435 	return (0);
436 }
437 
438 /*
439  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
440  * MCIOC_CTRLCONF, MCIOC_CONTROL.
441  *
442  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
443  * associated with various length struct. If given number is less than the
444  * number in kernel, update the number and return EINVAL so that user could
445  * allocate enough space for it.
446  *
447  */
448 
449 /* ARGSUSED */
450 static int
451 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
452 	int *rval_p)
453 {
454 	size_t	size;
455 	struct mc_memconf mcmconf;
456 	struct mc_memory *mcmem, mcmem_in;
457 	struct mc_segment *mcseg, mcseg_in;
458 	struct mc_bank mcbank;
459 	struct mc_devgrp mcdevgrp;
460 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
461 	struct mc_control *mccontrol, mccontrol_in;
462 	struct seg_info *seg = NULL;
463 	struct bank_info *bank = NULL;
464 	struct dgrp_info *dgrp = NULL;
465 	struct mctrl_info *mcport;
466 	mc_dlist_t *mctrl;
467 	int i, status = 0;
468 	cpu_t *cpu;
469 
470 	switch (cmd) {
471 	case MCIOC_MEMCONF:
472 		mutex_enter(&mcdatamutex);
473 
474 		mcmconf.nmcs = nmcs;
475 		mcmconf.nsegments = nsegments;
476 		mcmconf.nbanks = maxbanks;
477 		mcmconf.ndevgrps = NDGRPS;
478 		mcmconf.ndevs = NDIMMS;
479 		mcmconf.len_dev = MAX_DEVLEN;
480 		mcmconf.xfer_size = TRANSFER_SIZE;
481 
482 		mutex_exit(&mcdatamutex);
483 
484 		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
485 			return (EFAULT);
486 		return (0);
487 
488 	/*
489 	 * input: nsegments and allocate space for various length of segmentids
490 	 *
491 	 * return    0: size, number of segments, and all segment ids,
492 	 *		where glocal and local ids are identical.
493 	 *	EINVAL: if the given nsegments is less than that in kernel and
494 	 *		nsegments of struct will be updated.
495 	 *	EFAULT: if other errors in kernel.
496 	 */
497 	case MCIOC_MEM:
498 		if (copyin((void *)arg, &mcmem_in,
499 		    sizeof (struct mc_memory)) != 0)
500 			return (EFAULT);
501 
502 		mutex_enter(&mcdatamutex);
503 		if (mcmem_in.nsegments < nsegments) {
504 			mcmem_in.nsegments = nsegments;
505 			if (copyout(&mcmem_in, (void *)arg,
506 			    sizeof (struct mc_memory)))
507 				status = EFAULT;
508 			else
509 				status = EINVAL;
510 
511 			mutex_exit(&mcdatamutex);
512 			return (status);
513 		}
514 
515 		size = sizeof (struct mc_memory) + (nsegments - 1) *
516 		    sizeof (mcmem->segmentids[0]);
517 		mcmem = kmem_zalloc(size, KM_SLEEP);
518 
519 		mcmem->size = memsize;
520 		mcmem->nsegments = nsegments;
521 		seg = (struct seg_info *)seg_head;
522 		for (i = 0; i < nsegments; i++) {
523 			ASSERT(seg != NULL);
524 			mcmem->segmentids[i].globalid = seg->seg_node.id;
525 			mcmem->segmentids[i].localid = seg->seg_node.id;
526 			seg = (struct seg_info *)seg->seg_node.next;
527 		}
528 		mutex_exit(&mcdatamutex);
529 
530 		if (copyout(mcmem, (void *)arg, size))
531 			status = EFAULT;
532 
533 		kmem_free(mcmem, size);
534 		return (status);
535 
536 	/*
537 	 * input: id, nbanks and allocate space for various length of bankids
538 	 *
539 	 * return    0: base, size, number of banks, and all bank ids,
540 	 *		where global id is unique of all banks and local id
541 	 *		is only unique for mc.
542 	 *	EINVAL: either id isn't found or if given nbanks is less than
543 	 *		that in kernel and nbanks of struct will be updated.
544 	 *	EFAULT: if other errors in kernel.
545 	 */
546 	case MCIOC_SEG:
547 
548 		if (copyin((void *)arg, &mcseg_in,
549 		    sizeof (struct mc_segment)) != 0)
550 			return (EFAULT);
551 
552 		mutex_enter(&mcdatamutex);
553 		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
554 		    seg_head)) == NULL) {
555 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
556 			    "id %d\n", mcseg_in.id));
557 			mutex_exit(&mcdatamutex);
558 			return (EFAULT);
559 		}
560 
561 		if (mcseg_in.nbanks < seg->nbanks) {
562 			mcseg_in.nbanks = seg->nbanks;
563 			if (copyout(&mcseg_in, (void *)arg,
564 			    sizeof (struct mc_segment)))
565 				status = EFAULT;
566 			else
567 				status = EINVAL;
568 
569 			mutex_exit(&mcdatamutex);
570 			return (status);
571 		}
572 
573 		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
574 		    sizeof (mcseg->bankids[0]);
575 		mcseg = kmem_zalloc(size, KM_SLEEP);
576 
577 		mcseg->id = seg->seg_node.id;
578 		mcseg->ifactor = seg->ifactor;
579 		mcseg->base = seg->base;
580 		mcseg->size = seg->size;
581 		mcseg->nbanks = seg->nbanks;
582 
583 		bank = seg->hb_inseg;
584 
585 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
586 		    seg->nbanks, seg, bank));
587 
588 		i = 0;
589 		while (bank != NULL) {
590 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
591 			    i, bank->bank_node.id));
592 			mcseg->bankids[i].globalid = bank->bank_node.id;
593 			mcseg->bankids[i++].localid =
594 			    bank->local_id;
595 			bank = bank->n_inseg;
596 		}
597 		ASSERT(i == seg->nbanks);
598 		mutex_exit(&mcdatamutex);
599 
600 		if (copyout(mcseg, (void *)arg, size))
601 			status = EFAULT;
602 
603 		kmem_free(mcseg, size);
604 		return (status);
605 
606 	/*
607 	 * input: id
608 	 *
609 	 * return    0: mask, match, size, and devgrpid,
610 	 *		where global id is unique of all devgrps and local id
611 	 *		is only unique for mc.
612 	 *	EINVAL: if id isn't found
613 	 *	EFAULT: if other errors in kernel.
614 	 */
615 	case MCIOC_BANK:
616 		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
617 			return (EFAULT);
618 
619 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
620 
621 		mutex_enter(&mcdatamutex);
622 
623 		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
624 		    bank_head)) == NULL) {
625 			mutex_exit(&mcdatamutex);
626 			return (EINVAL);
627 		}
628 
629 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
630 		    bank->bank_node.id, bank, bank->valid));
631 
632 		/*
633 		 * If (Physic Address & MASK) == MATCH, Physic Address is
634 		 * located at this bank. The lower physical address bits
635 		 * are at [9-6].
636 		 */
637 		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
638 		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
639 		mcbank.match = bank->lm << MADR_LPA_SHIFT;
640 		mcbank.size = bank->size;
641 		mcbank.devgrpid.globalid = bank->devgrp_id;
642 		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
643 
644 		mutex_exit(&mcdatamutex);
645 
646 		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
647 			return (EFAULT);
648 		return (0);
649 
650 	/*
651 	 * input:id and allocate space for various length of deviceids
652 	 *
653 	 * return    0: size and number of devices.
654 	 *	EINVAL: id isn't found
655 	 *	EFAULT: if other errors in kernel.
656 	 */
657 	case MCIOC_DEVGRP:
658 
659 		if (copyin((void *)arg, &mcdevgrp,
660 		    sizeof (struct mc_devgrp)) != 0)
661 			return (EFAULT);
662 
663 		mutex_enter(&mcdatamutex);
664 		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
665 		    dgrp_head)) == NULL) {
666 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
667 			    "%d\n", mcdevgrp.id));
668 			mutex_exit(&mcdatamutex);
669 			return (EINVAL);
670 		}
671 
672 		mcdevgrp.ndevices = dgrp->ndevices;
673 		mcdevgrp.size = dgrp->size;
674 
675 		mutex_exit(&mcdatamutex);
676 
677 		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
678 			status = EFAULT;
679 
680 		return (status);
681 
682 	/*
683 	 * input: nmcs and allocate space for various length of mcids
684 	 *
685 	 * return    0: number of mc, and all mcids,
686 	 *		where glocal and local ids are identical.
687 	 *	EINVAL: if the given nmcs is less than that in kernel and
688 	 *		nmcs of struct will be updated.
689 	 *	EFAULT: if other errors in kernel.
690 	 */
691 	case MCIOC_CTRLCONF:
692 		if (copyin((void *)arg, &mcctrlconf_in,
693 		    sizeof (struct mc_ctrlconf)) != 0)
694 			return (EFAULT);
695 
696 		mutex_enter(&mcdatamutex);
697 		if (mcctrlconf_in.nmcs < nmcs) {
698 			mcctrlconf_in.nmcs = nmcs;
699 			if (copyout(&mcctrlconf_in, (void *)arg,
700 			    sizeof (struct mc_ctrlconf)))
701 				status = EFAULT;
702 			else
703 				status = EINVAL;
704 
705 			mutex_exit(&mcdatamutex);
706 			return (status);
707 		}
708 
709 		/*
710 		 * Cannot just use the size of the struct because of the various
711 		 * length struct
712 		 */
713 		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
714 		    sizeof (mcctrlconf->mcids[0]));
715 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
716 
717 		mcctrlconf->nmcs = nmcs;
718 
719 		/* Get all MC ids and add to mcctrlconf */
720 		mctrl = mctrl_head;
721 		i = 0;
722 		while (mctrl != NULL) {
723 			mcctrlconf->mcids[i].globalid = mctrl->id;
724 			mcctrlconf->mcids[i].localid = mctrl->id;
725 			i++;
726 			mctrl = mctrl->next;
727 		}
728 		ASSERT(i == nmcs);
729 
730 		mutex_exit(&mcdatamutex);
731 
732 		if (copyout(mcctrlconf, (void *)arg, size))
733 			status = EFAULT;
734 
735 		kmem_free(mcctrlconf, size);
736 		return (status);
737 
738 	/*
739 	 * input:id, ndevgrps and allocate space for various length of devgrpids
740 	 *
741 	 * return    0: number of devgrp, and all devgrpids,
742 	 *		is unique of all devgrps and local id is only unique
743 	 *		for mc.
744 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
745 	 *		less than that in kernel and ndevgrps of struct will
746 	 *		be updated.
747 	 *	EFAULT: if other errors in kernel.
748 	 */
749 	case MCIOC_CONTROL:
750 		if (copyin((void *)arg, &mccontrol_in,
751 		    sizeof (struct mc_control)) != 0)
752 			return (EFAULT);
753 
754 		mutex_enter(&mcdatamutex);
755 		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
756 		    mctrl_head)) == NULL) {
757 			mutex_exit(&mcdatamutex);
758 			return (EINVAL);
759 		}
760 
761 		/*
762 		 * mcport->ndevgrps zero means Memory Controller is disable.
763 		 */
764 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
765 		    (mcport->ndevgrps == 0)) {
766 			mccontrol_in.ndevgrps = mcport->ndevgrps;
767 			if (copyout(&mccontrol_in, (void *)arg,
768 			    sizeof (struct mc_control)))
769 				status = EFAULT;
770 			else if (mcport->ndevgrps != 0)
771 				status = EINVAL;
772 
773 			mutex_exit(&mcdatamutex);
774 			return (status);
775 		}
776 
777 		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
778 		    sizeof (mccontrol->devgrpids[0]);
779 		mccontrol = kmem_zalloc(size, KM_SLEEP);
780 
781 		mccontrol->id = mcport->mctrl_node.id;
782 		mccontrol->ndevgrps = mcport->ndevgrps;
783 		for (i = 0; i < mcport->ndevgrps; i++) {
784 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
785 			mccontrol->devgrpids[i].localid =
786 			    mcport->devgrpids[i] % NDGRPS;
787 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
788 			    *(uint64_t *)&mccontrol->devgrpids[i]));
789 		}
790 		mutex_exit(&mcdatamutex);
791 
792 		if (copyout(mccontrol, (void *)arg, size))
793 			status = EFAULT;
794 
795 		kmem_free(mccontrol, size);
796 		return (status);
797 
798 	/*
799 	 * input:id
800 	 *
801 	 * return    0: CPU flushed successfully.
802 	 *	EINVAL: the id wasn't found
803 	 */
804 	case MCIOC_ECFLUSH:
805 		mutex_enter(&cpu_lock);
806 		cpu = cpu_get((processorid_t)arg);
807 		mutex_exit(&cpu_lock);
808 		if (cpu == NULL)
809 			return (EINVAL);
810 
811 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
812 
813 		return (0);
814 
815 	default:
816 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
817 		return (EFAULT);
818 	}
819 }
820 
821 /*
822  * Get Memory Address Decoding Registers and construct list.
823  * flag is to workaround Cheetah's restriction where register cannot be mapped
824  * if port id(MC registers on it) == cpu id(process is running on it).
825  */
826 static int
827 mc_get_mcregs(struct mc_soft_state *softsp)
828 {
829 	int i;
830 	int err = 0;
831 	uint64_t madreg;
832 	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
833 
834 	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
835 	mc_construct(softsp->portid, softsp->memlayoutp);
836 
837 	/*
838 	 * If memlayoutp is NULL, the Memory Controller is disable, and
839 	 * doesn't need to create any bank and segment.
840 	 */
841 	if (softsp->memlayoutp == NULL)
842 		goto exit;
843 
844 	/*
845 	 * Get the content of 4 Memory Address Decoding Registers, and
846 	 * construct lists of logical banks and segments.
847 	 */
848 	for (i = 0; i < NBANKS; i++) {
849 		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
850 		    "cpu=%d\n", softsp->mc_base, softsp->portid, CPU->cpu_id));
851 
852 		kpreempt_disable();
853 		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
854 			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
855 		else
856 			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
857 			    (i * REGOFFSET)));
858 		kpreempt_enable();
859 
860 		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
861 		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
862 
863 		ma_reg_array[i] = madreg;
864 
865 		if ((err = mlayout_add(softsp->portid, i, madreg,
866 		    softsp->memlayoutp)) == -1)
867 			break;
868 	}
869 
870 	/*
871 	 * Create the logical bank property for this mc node. This
872 	 * property is an encoded array of the madr for each logical
873 	 * bank (there are NBANKS of these).
874 	 */
875 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
876 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
877 	    MEM_CFG_PROP_NAME) != 1) {
878 		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
879 			DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
880 			(caddr_t)&ma_reg_array, sizeof (ma_reg_array));
881 	}
882 
883 exit:
884 	if (!err) {
885 		mutex_enter(&mcdatamutex);
886 		nmcs++;
887 		mutex_exit(&mcdatamutex);
888 	}
889 	return (err);
890 }
891 
892 /*
893  * A cache line is composed of four quadwords with the associated ECC, the
894  * MTag along with its associated ECC. This is depicted below:
895  *
896  * |                    Data                    |   ECC   | Mtag |MTag ECC|
897  *  127                                         0 8       0 2    0 3      0
898  *
899  * synd_code will be mapped as the following order to mc_get_mem_unum.
900  *  143                                         16        7      4        0
901  *
902  * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
903  *  575         432 431         288 287         144 143		   0
904  *
905  * dimm table: each bit at a cache line needs two bits to present one of
906  *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
907  *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
908  *
909  * pin table: each bit at a cache line needs one byte to present pin position,
910  *      where max. is 230. So it needs 576 bytes. The order of table index is
911  *      the same as bit position at a cache line, i.e. pin_table[0] presents
912  *      for bit 0, Mtag ECC 0 of Quadword 3.
913  *
914  * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
915  * Referring to Figure 3-4, Excalibur Architecture Manual.
916  * This table could be moved to cheetah.c if other platform teams agree with
917  * the bit layout at QuadWord.
918  */
919 
920 static uint8_t qwordmap[] =
921 {
922 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
923 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
924 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
925 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
926 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
927 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
928 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
929 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
930 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
931 };
932 
933 #define	QWORD_SIZE	144
934 
935 /* ARGSUSED */
936 static int
937 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
938 {
939 	int i, upper_pa, lower_pa, dimmoffset;
940 	int quadword, pos_cacheline, position, index, idx4dimm;
941 	int qwlayout = synd_code;
942 	short offset, data;
943 	char unum[UNUM_NAMLEN];
944 	struct dimm_info *dimmp;
945 	struct pin_info *pinp;
946 	struct bank_info *bank;
947 
948 	/*
949 	 * Enforce old Openboot requirement for synd code, either a single-bit
950 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
951 	 */
952 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
953 		return (EINVAL);
954 
955 	unum[0] = '\0';
956 
957 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
958 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
959 
960 	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
961 
962 	/*
963 	 * Scan all logical banks to get one responding to the physical
964 	 * address. Then compute the index to look up dimm and pin tables
965 	 * to generate the unmuber.
966 	 */
967 	mutex_enter(&mcdatamutex);
968 	bank = (struct bank_info *)bank_head;
969 	while (bank != NULL) {
970 		int bankid, mcid, bankno_permc;
971 
972 		bankid = bank->bank_node.id;
973 		bankno_permc = bankid % NBANKS;
974 		mcid = bankid / NBANKS;
975 
976 		/*
977 		 * The Address Decoding logic decodes the different fields
978 		 * in the Memory Address Drcoding register to determine
979 		 * whether a particular logic bank should respond to a
980 		 * physical address.
981 		 */
982 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
983 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
984 			bank = (struct bank_info *)bank->bank_node.next;
985 			continue;
986 		}
987 
988 		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
989 
990 		dimmp = (struct dimm_info *)bank->dimminfop;
991 		ASSERT(dimmp != NULL);
992 
993 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
994 			/*
995 			 * single-bit error handling, we can identify specific
996 			 * DIMM.
997 			 */
998 
999 			pinp = (struct pin_info *)&dimmp->data[0];
1000 
1001 			if (!dimmp->sym_flag)
1002 				pinp++;
1003 
1004 			quadword = (paddr & 0x3f) / 16;
1005 			/* or quadword = (paddr >> 4) % 4; */
1006 			pos_cacheline = ((3 - quadword) * 144) +
1007 			    qwordmap[qwlayout];
1008 			position = 575 - pos_cacheline;
1009 			index = position * 2 / 8;
1010 			offset = position % 4;
1011 
1012 			/*
1013 			 * Trade-off: We cound't add pin number to
1014 			 * unumber string because statistic number
1015 			 * pumps up at the corresponding dimm not pin.
1016 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1017 			 * pinp->pintable[pos_cacheline]);
1018 			 */
1019 			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1020 			    (uint_t)pinp->pintable[pos_cacheline]));
1021 			data = pinp->dimmtable[index];
1022 			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1023 
1024 			(void) strncpy(unum,
1025 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1026 			    UNUM_NAMLEN);
1027 			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1028 			/*
1029 			 * platform hook for adding label information to unum.
1030 			 */
1031 			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1032 			    idx4dimm);
1033 		} else {
1034 			char *p = unum;
1035 			size_t res = UNUM_NAMLEN;
1036 
1037 			/*
1038 			 * multi-bit error handling, we can only identify
1039 			 * bank of DIMMs.
1040 			 */
1041 
1042 			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1043 				(void) snprintf(p, res, "%s%s",
1044 				    i == 0 ? "" : " ",
1045 				    (char *)dimmp->label[dimmoffset + i]);
1046 				res -= strlen(p);
1047 				p += strlen(p);
1048 			}
1049 
1050 			/*
1051 			 * platform hook for adding label information
1052 			 * to unum.
1053 			 */
1054 			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1055 		}
1056 		mutex_exit(&mcdatamutex);
1057 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1058 		    (strlen(unum) >= buflen)) {
1059 			return (ENOSPC);
1060 		} else {
1061 			(void) strncpy(buf, unum, buflen);
1062 			*lenp = strlen(buf);
1063 			return (0);
1064 		}
1065 	}	/* end of while loop for logic bank list */
1066 
1067 	mutex_exit(&mcdatamutex);
1068 	return (ENXIO);
1069 }
1070 
1071 static int
1072 mc_get_mem_info(int synd_code, uint64_t paddr,
1073     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1074     int *segsp, int *banksp, int *mcidp)
1075 {
1076 	int upper_pa, lower_pa;
1077 	struct bank_info *bankp;
1078 
1079 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1080 		return (EINVAL);
1081 
1082 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1083 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1084 
1085 	/*
1086 	 * Scan all logical banks to get one responding to the physical
1087 	 * address.
1088 	 */
1089 	mutex_enter(&mcdatamutex);
1090 	bankp = (struct bank_info *)bank_head;
1091 	while (bankp != NULL) {
1092 		struct seg_info *segp;
1093 		int bankid, mcid;
1094 
1095 		bankid = bankp->bank_node.id;
1096 		mcid = bankid / NBANKS;
1097 
1098 		/*
1099 		 * The Address Decoding logic decodes the different fields
1100 		 * in the Memory Address Decoding register to determine
1101 		 * whether a particular logic bank should respond to a
1102 		 * physical address.
1103 		 */
1104 		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1105 		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1106 			bankp = (struct bank_info *)bankp->bank_node.next;
1107 			continue;
1108 		}
1109 
1110 		/*
1111 		 * Get the corresponding segment.
1112 		 */
1113 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1114 		    seg_head)) == NULL) {
1115 			mutex_exit(&mcdatamutex);
1116 			return (EFAULT);
1117 		}
1118 
1119 		*mem_sizep = memsize;
1120 		*seg_sizep = segp->size;
1121 		*bank_sizep = bankp->size;
1122 		*segsp = nsegments;
1123 		*banksp = segp->nbanks;
1124 		*mcidp = mcid;
1125 
1126 		mutex_exit(&mcdatamutex);
1127 
1128 		return (0);
1129 
1130 	}	/* end of while loop for logic bank list */
1131 
1132 	mutex_exit(&mcdatamutex);
1133 	return (ENXIO);
1134 }
1135 
1136 /*
1137  * Construct lists for an enabled MC where size of memory is 0.
1138  * The lists are connected as follows:
1139  * Attached MC -> device group list -> device list(per devgrp).
1140  */
1141 static void
1142 mc_construct(int mc_id, void *dimminfop)
1143 {
1144 	int i, j, idx, dmidx;
1145 	struct mctrl_info *mctrl;
1146 	struct dgrp_info *dgrp;
1147 	struct device_info *dev;
1148 	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1149 
1150 	mutex_enter(&mcdatamutex);
1151 	/* allocate for mctrl_info and bank_info */
1152 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1153 	    mctrl_head)) != NULL) {
1154 		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1155 		mutex_exit(&mcdatamutex);
1156 		return;
1157 	}
1158 
1159 	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1160 
1161 	/*
1162 	 * If dimminfop is NULL, the Memory Controller is disable, and
1163 	 * the number of device group will be zero.
1164 	 */
1165 	if (dimminfop == NULL) {
1166 		mctrl->mctrl_node.id = mc_id;
1167 		mctrl->ndevgrps = 0;
1168 		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1169 		mutex_exit(&mcdatamutex);
1170 		return;
1171 	}
1172 
1173 	/* add the entry on dgrp_info list */
1174 	for (i = 0; i < NDGRPS; i++) {
1175 		idx = mc_id * NDGRPS + i;
1176 		mctrl->devgrpids[i] = idx;
1177 		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1178 		    != NULL) {
1179 			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1180 			    idx);
1181 			continue;
1182 		}
1183 
1184 		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1185 
1186 		/* add the entry on device_info list */
1187 		for (j = 0; j < NDIMMS; j++) {
1188 			dmidx = idx * NDIMMS + j;
1189 			dgrp->deviceids[j] = dmidx;
1190 			if ((dev = (struct device_info *)
1191 			    mc_node_get(dmidx, device_head)) != NULL) {
1192 				cmn_err(CE_WARN, "mc_construct: device %d "
1193 				    "exists\n", dmidx);
1194 				continue;
1195 			}
1196 			dev = kmem_zalloc(sizeof (struct device_info),
1197 			    KM_SLEEP);
1198 			dev->dev_node.id = dmidx;
1199 			dev->size = 0;
1200 			(void) strncpy(dev->label, (char *)
1201 			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1202 
1203 			mc_node_add((mc_dlist_t *)dev, &device_head,
1204 			    &device_tail);
1205 		}	/* for loop for constructing device_info */
1206 
1207 		dgrp->dgrp_node.id = idx;
1208 		dgrp->ndevices = NDIMMS;
1209 		dgrp->size = 0;
1210 		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1211 
1212 	}	/* end of for loop for constructing dgrp_info list */
1213 
1214 	mctrl->mctrl_node.id = mc_id;
1215 	mctrl->ndevgrps = NDGRPS;
1216 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1217 	mutex_exit(&mcdatamutex);
1218 }
1219 
1220 /*
1221  * Construct lists for Memory Configuration at logical viewpoint.
1222  *
1223  * Retrieve information from Memory Address Decoding Register and set up
1224  * bank and segment lists. Link bank to its corresponding device group, and
1225  * update size of device group and devices. Also connect bank to the segment.
1226  *
1227  * Memory Address Decoding Register
1228  * -------------------------------------------------------------------------
1229  * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1230  * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1231  * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1232  * -------------------------------------------------------------------------
1233  *
1234  */
1235 
1236 static int
1237 mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1238 {
1239 	int i, dmidx, idx;
1240 	uint32_t ifactor;
1241 	int status = 0;
1242 	uint64_t size, base;
1243 	struct seg_info *seg_curr;
1244 	struct bank_info *bank_curr;
1245 	struct dgrp_info *dgrp;
1246 	struct device_info *dev;
1247 	union {
1248 		struct {
1249 			uint64_t valid	: 1;
1250 			uint64_t resrv1	: 10;
1251 			uint64_t uk	: 12;
1252 			uint64_t resrv2	: 4;
1253 			uint64_t um	: 17;
1254 			uint64_t resrv3	: 2;
1255 			uint64_t lk	: 4;
1256 			uint64_t resrv4	: 2;
1257 			uint64_t lm	: 4;
1258 			uint64_t resrv5	: 8;
1259 		} _s;
1260 		uint64_t madreg;
1261 	} mcreg;
1262 
1263 	mcreg.madreg = reg;
1264 
1265 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1266 	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1267 
1268 	/* add the entry on bank_info list */
1269 	idx = mc_id * NBANKS + bank_no;
1270 
1271 	mutex_enter(&mcdatamutex);
1272 	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1273 	    != NULL) {
1274 		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1275 		goto exit;
1276 	}
1277 
1278 	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1279 	bank_curr->bank_node.id = idx;
1280 	bank_curr->valid = mcreg._s.valid;
1281 	bank_curr->dimminfop = dimminfop;
1282 
1283 	if (!mcreg._s.valid) {
1284 		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1285 		goto exit;
1286 	}
1287 
1288 	/*
1289 	 * size of a logical bank = size of segment / interleave factor
1290 	 * This fomula is not only working for regular configuration,
1291 	 * i.e. number of banks at a segment equals to the max
1292 	 * interleave factor, but also for special case, say 3 bank
1293 	 * interleave. One bank is 2 way interleave and other two are
1294 	 * 4 way. So the sizes of banks are size of segment/2 and /4
1295 	 * respectively.
1296 	 */
1297 	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1298 	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1299 	base = mcreg._s.um & ~mcreg._s.uk;
1300 	base <<= MADR_UPA_SHIFT;
1301 
1302 	bank_curr->uk = mcreg._s.uk;
1303 	bank_curr->um = mcreg._s.um;
1304 	bank_curr->lk = mcreg._s.lk;
1305 	bank_curr->lm = mcreg._s.lm;
1306 	bank_curr->size = size;
1307 
1308 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1309 	"lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1310 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1311 
1312 	/* connect the entry and update the size on dgrp_info list */
1313 	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1314 	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1315 		/* all avaiable dgrp should be linked at mc_construct */
1316 		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1317 		kmem_free(bank_curr, sizeof (struct bank_info));
1318 		status = -1;
1319 		goto exit;
1320 	}
1321 
1322 	bank_curr->devgrp_id = idx;
1323 	dgrp->size += size;
1324 
1325 	/* Update the size of entry on device_info list */
1326 	for (i = 0; i < NDIMMS; i++) {
1327 		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1328 		dgrp->deviceids[i] = dmidx;
1329 
1330 		/* avaiable device should be linked at mc_construct */
1331 		if ((dev = (struct device_info *)mc_node_get(dmidx,
1332 		    device_head)) == NULL) {
1333 			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1334 			    dmidx);
1335 			kmem_free(bank_curr, sizeof (struct bank_info));
1336 			status = -1;
1337 			goto exit;
1338 		}
1339 
1340 		dev->size += (size / NDIMMS);
1341 
1342 		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1343 		    dmidx, size));
1344 	}
1345 
1346 	/*
1347 	 * Get the segment by matching the base address, link this bank
1348 	 * to the segment. If not matched, allocate a new segment and
1349 	 * add it at segment list.
1350 	 */
1351 	if (seg_curr = seg_match_base(base)) {
1352 		seg_curr->nbanks++;
1353 		seg_curr->size += size;
1354 		if (ifactor > seg_curr->ifactor)
1355 			seg_curr->ifactor = ifactor;
1356 		bank_curr->seg_id = seg_curr->seg_node.id;
1357 	} else {
1358 		seg_curr = (struct seg_info *)
1359 		kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1360 		bank_curr->seg_id = seg_id;
1361 		seg_curr->seg_node.id = seg_id++;
1362 		seg_curr->base = base;
1363 		seg_curr->size = size;
1364 		seg_curr->nbanks = 1;
1365 		seg_curr->ifactor = ifactor;
1366 		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1367 
1368 		nsegments++;
1369 	}
1370 
1371 	/* Get the local id of bank which is only unique per segment. */
1372 	bank_curr->local_id = seg_curr->nbanks - 1;
1373 
1374 	/* add bank at the end of the list; not sorted by bankid */
1375 	if (seg_curr->hb_inseg != NULL) {
1376 		bank_curr->p_inseg = seg_curr->tb_inseg;
1377 		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1378 		seg_curr->tb_inseg->n_inseg = bank_curr;
1379 		seg_curr->tb_inseg = bank_curr;
1380 	} else {
1381 		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1382 		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1383 	}
1384 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1385 	    seg_curr->seg_node.id));
1386 
1387 	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1388 
1389 	memsize += size;
1390 	if (seg_curr->nbanks > maxbanks)
1391 		maxbanks = seg_curr->nbanks;
1392 
1393 exit:
1394 	mutex_exit(&mcdatamutex);
1395 	return (status);
1396 }
1397 
1398 /*
1399  * Delete nodes related to the given MC on mc, device group, device,
1400  * and bank lists. Moreover, delete corresponding segment if its connected
1401  * banks are all removed.
1402  */
1403 static void
1404 mlayout_del(int mc_id)
1405 {
1406 	int i, j, dgrpid, devid, bankid, ndevgrps;
1407 	struct seg_info *seg;
1408 	struct bank_info *bank_curr;
1409 	struct mctrl_info *mctrl;
1410 	mc_dlist_t *dgrp_ptr;
1411 	mc_dlist_t *dev_ptr;
1412 	uint64_t base;
1413 
1414 	mutex_enter(&mcdatamutex);
1415 
1416 	/* delete mctrl_info */
1417 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1418 	    NULL) {
1419 		ndevgrps = mctrl->ndevgrps;
1420 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1421 		kmem_free(mctrl, sizeof (struct mctrl_info));
1422 		nmcs--;
1423 
1424 		/*
1425 		 * There is no other list left for disabled MC.
1426 		 */
1427 		if (ndevgrps == 0) {
1428 			mutex_exit(&mcdatamutex);
1429 			return;
1430 		}
1431 	} else
1432 		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1433 
1434 	/* Delete device groups and devices of the detached MC */
1435 	for (i = 0; i < NDGRPS; i++) {
1436 		dgrpid = mc_id * NDGRPS + i;
1437 		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1438 			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1439 			continue;
1440 		}
1441 
1442 		for (j = 0; j < NDIMMS; j++) {
1443 			devid = dgrpid * NDIMMS + j;
1444 			if (dev_ptr = mc_node_get(devid, device_head)) {
1445 				mc_node_del(dev_ptr, &device_head,
1446 				    &device_tail);
1447 				kmem_free(dev_ptr, sizeof (struct device_info));
1448 			} else {
1449 				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1450 				    devid);
1451 			}
1452 		}
1453 
1454 		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1455 		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1456 	}
1457 
1458 	/* Delete banks and segments if it has no bank */
1459 	for (i = 0; i < NBANKS; i++) {
1460 		bankid = mc_id * NBANKS + i;
1461 		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1462 		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1463 		    bank_head))) {
1464 			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1465 			continue;
1466 		}
1467 
1468 		if (bank_curr->valid) {
1469 			base = bank_curr->um & ~bank_curr->uk;
1470 			base <<= MADR_UPA_SHIFT;
1471 			bank_curr->valid = 0;
1472 			memsize -= bank_curr->size;
1473 
1474 			/* Delete bank at segment and segment if no bank left */
1475 			if (!(seg = seg_match_base(base))) {
1476 				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1477 				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1478 				    &bank_tail);
1479 				kmem_free(bank_curr, sizeof (struct bank_info));
1480 				continue;
1481 			}
1482 
1483 			/* update the bank list at the segment */
1484 			if (bank_curr->n_inseg == NULL) {
1485 				/* node is at the tail of list */
1486 				seg->tb_inseg = bank_curr->p_inseg;
1487 			} else {
1488 				bank_curr->n_inseg->p_inseg =
1489 				    bank_curr->p_inseg;
1490 			}
1491 
1492 			if (bank_curr->p_inseg == NULL) {
1493 				/* node is at the head of list */
1494 				seg->hb_inseg = bank_curr->n_inseg;
1495 			} else {
1496 				bank_curr->p_inseg->n_inseg =
1497 				    bank_curr->n_inseg;
1498 			}
1499 
1500 			seg->nbanks--;
1501 			seg->size -= bank_curr->size;
1502 
1503 			if (seg->nbanks == 0) {
1504 				mc_node_del((mc_dlist_t *)seg, &seg_head,
1505 				    &seg_tail);
1506 				kmem_free(seg, sizeof (struct seg_info));
1507 				nsegments--;
1508 			}
1509 
1510 		}
1511 		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1512 		kmem_free(bank_curr, sizeof (struct bank_info));
1513 	}	/* end of for loop for four banks */
1514 
1515 	mutex_exit(&mcdatamutex);
1516 }
1517 
1518 /*
1519  * Search the segment in the list starting at seg_head by base address
1520  * input: base address
1521  * return: pointer of found segment or null if not found.
1522  */
1523 static struct seg_info *
1524 seg_match_base(u_longlong_t base)
1525 {
1526 	static struct seg_info *seg_ptr;
1527 
1528 	seg_ptr = (struct seg_info *)seg_head;
1529 	while (seg_ptr != NULL) {
1530 		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1531 		    seg_ptr->base, base));
1532 		if (seg_ptr->base == base)
1533 			break;
1534 		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1535 	}
1536 	return (seg_ptr);
1537 }
1538 
1539 /*
1540  * mc_dlist is a double linking list, including unique id, and pointers to
1541  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1542  * and mctrl_info has it at the top to share the operations, add, del, and get.
1543  *
1544  * The new node is added at the tail and is not sorted.
1545  *
1546  * Input: The pointer of node to be added, head and tail of the list
1547  */
1548 
1549 static void
1550 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1551 {
1552 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1553 		node->id, *head, *tail));
1554 
1555 	if (*head != NULL) {
1556 		node->prev = *tail;
1557 		node->next = (*tail)->next;
1558 		(*tail)->next = node;
1559 		*tail = node;
1560 	} else {
1561 		node->next = node->prev = NULL;
1562 		*head = *tail = node;
1563 	}
1564 }
1565 
1566 /*
1567  * Input: The pointer of node to be deleted, head and tail of the list
1568  *
1569  * Deleted node will be at the following positions
1570  * 1. At the tail of the list
1571  * 2. At the head of the list
1572  * 3. At the head and tail of the list, i.e. only one left.
1573  * 4. At the middle of the list
1574  */
1575 
1576 static void
1577 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1578 {
1579 	if (node->next == NULL) {
1580 		/* deleted node is at the tail of list */
1581 		*tail = node->prev;
1582 	} else {
1583 		node->next->prev = node->prev;
1584 	}
1585 
1586 	if (node->prev == NULL) {
1587 		/* deleted node is at the head of list */
1588 		*head = node->next;
1589 	} else {
1590 		node->prev->next = node->next;
1591 	}
1592 }
1593 
1594 /*
1595  * Search the list from the head of the list to match the given id
1596  * Input: id and the head of the list
1597  * Return: pointer of found node
1598  */
1599 static mc_dlist_t *
1600 mc_node_get(int id, mc_dlist_t *head)
1601 {
1602 	mc_dlist_t *node;
1603 
1604 	node = head;
1605 	while (node != NULL) {
1606 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1607 		    node->id, id));
1608 		if (node->id == id)
1609 			break;
1610 		node = node->next;
1611 	}
1612 	return (node);
1613 }
1614 
1615 /*
1616  * mc-us3 driver allows a platform to add extra label
1617  * information to the unum string. If a platform implements a
1618  * kernel function called plat_add_mem_unum_label() it will be
1619  * executed. This would typically be implemented in the platmod.
1620  */
1621 static void
1622 mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
1623 {
1624 	if (&plat_add_mem_unum_label)
1625 		plat_add_mem_unum_label(buf, mcid, bank, dimm);
1626 }
1627