xref: /titanic_44/usr/src/uts/sun4u/io/mc-us3.c (revision b00044a2eb43864b8718585d21949611a2ee59ef)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/conf.h>
28 #include <sys/ddi.h>
29 #include <sys/stat.h>
30 #include <sys/sunddi.h>
31 #include <sys/ddi_impldefs.h>
32 #include <sys/obpdefs.h>
33 #include <sys/cmn_err.h>
34 #include <sys/errno.h>
35 #include <sys/kmem.h>
36 #include <sys/open.h>
37 #include <sys/thread.h>
38 #include <sys/cpuvar.h>
39 #include <sys/x_call.h>
40 #include <sys/debug.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ivintr.h>
43 #include <sys/intr.h>
44 #include <sys/intreg.h>
45 #include <sys/autoconf.h>
46 #include <sys/modctl.h>
47 #include <sys/spl.h>
48 #include <sys/async.h>
49 #include <sys/mc.h>
50 #include <sys/mc-us3.h>
51 #include <sys/cpu_module.h>
52 #include <sys/platform_module.h>
53 
54 /*
55  * Function prototypes
56  */
57 
58 static int mc_open(dev_t *, int, int, cred_t *);
59 static int mc_close(dev_t, int, int, cred_t *);
60 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
61 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
62 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
63 
64 /*
65  * Configuration data structures
66  */
67 static struct cb_ops mc_cb_ops = {
68 	mc_open,			/* open */
69 	mc_close,			/* close */
70 	nulldev,			/* strategy */
71 	nulldev,			/* print */
72 	nodev,				/* dump */
73 	nulldev,			/* read */
74 	nulldev,			/* write */
75 	mc_ioctl,			/* ioctl */
76 	nodev,				/* devmap */
77 	nodev,				/* mmap */
78 	nodev,				/* segmap */
79 	nochpoll,			/* poll */
80 	ddi_prop_op,			/* cb_prop_op */
81 	0,				/* streamtab */
82 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
83 	CB_REV,				/* rev */
84 	nodev,				/* cb_aread */
85 	nodev				/* cb_awrite */
86 };
87 
88 static struct dev_ops mc_ops = {
89 	DEVO_REV,			/* rev */
90 	0,				/* refcnt  */
91 	ddi_getinfo_1to1,		/* getinfo */
92 	nulldev,			/* identify */
93 	nulldev,			/* probe */
94 	mc_attach,			/* attach */
95 	mc_detach,			/* detach */
96 	nulldev,			/* reset */
97 	&mc_cb_ops,			/* cb_ops */
98 	(struct bus_ops *)0,		/* bus_ops */
99 	nulldev				/* power */
100 };
101 
102 /*
103  * Driver globals
104  */
105 static void *mcp;
106 static int nmcs = 0;
107 static int seg_id = 0;
108 static int nsegments = 0;
109 static uint64_t memsize = 0;
110 static int maxbanks = 0;
111 
112 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
113 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
114 static mc_dlist_t *device_head, *device_tail;
115 
116 static kmutex_t	mcmutex;
117 static kmutex_t	mcdatamutex;
118 
119 static krwlock_t mcdimmsids_rw;
120 
121 /* pointer to cache of DIMM serial ids */
122 static dimm_sid_cache_t	*mc_dimm_sids;
123 static int		max_entries;
124 
125 extern struct mod_ops mod_driverops;
126 
127 static struct modldrv modldrv = {
128 	&mod_driverops,			/* module type, this one is a driver */
129 	"Memory-controller",		/* module name */
130 	&mc_ops,			/* driver ops */
131 };
132 
133 static struct modlinkage modlinkage = {
134 	MODREV_1,		/* rev */
135 	(void *)&modldrv,
136 	NULL
137 };
138 
139 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
140     int buflen, int *lenp);
141 static int mc_get_mem_info(int synd_code, uint64_t paddr,
142     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
143     int *segsp, int *banksp, int *mcidp);
144 static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp);
145 static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
146 static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr);
147 static int mc_init_sid_cache(void);
148 static int mc_get_mcregs(struct mc_soft_state *);
149 static void mc_construct(int mc_id, void *dimminfop);
150 static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
151 static void mlayout_del(int mc_id, int delete);
152 static struct seg_info *seg_match_base(u_longlong_t base);
153 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
154 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
155 static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
156 static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
157 static int mc_populate_sid_cache(void);
158 static int mc_get_sid_cache_index(int mcid);
159 static void mc_update_bank(struct bank_info *bank);
160 
161 #pragma weak p2get_mem_unum
162 #pragma weak p2get_mem_info
163 #pragma weak p2get_mem_sid
164 #pragma weak p2get_mem_offset
165 #pragma	weak p2get_mem_addr
166 #pragma weak p2init_sid_cache
167 #pragma weak plat_add_mem_unum_label
168 #pragma weak plat_alloc_sid_cache
169 #pragma weak plat_populate_sid_cache
170 
171 #define	QWORD_SIZE		144
172 #define	QWORD_SIZE_BYTES	(QWORD_SIZE / 8)
173 
174 /*
175  * These are the module initialization routines.
176  */
177 
178 int
179 _init(void)
180 {
181 	int error;
182 
183 	if ((error = ddi_soft_state_init(&mcp,
184 	    sizeof (struct mc_soft_state), 1)) != 0)
185 		return (error);
186 
187 	error =  mod_install(&modlinkage);
188 	if (error == 0) {
189 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
190 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
191 		rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL);
192 	}
193 
194 	return (error);
195 }
196 
197 int
198 _fini(void)
199 {
200 	int error;
201 
202 	if ((error = mod_remove(&modlinkage)) != 0)
203 		return (error);
204 
205 	ddi_soft_state_fini(&mcp);
206 	mutex_destroy(&mcmutex);
207 	mutex_destroy(&mcdatamutex);
208 	rw_destroy(&mcdimmsids_rw);
209 
210 	if (mc_dimm_sids)
211 		kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) *
212 		    max_entries);
213 
214 	return (0);
215 }
216 
217 int
218 _info(struct modinfo *modinfop)
219 {
220 	return (mod_info(&modlinkage, modinfop));
221 }
222 
223 static int
224 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
225 {
226 	struct mc_soft_state *softsp;
227 	struct dimm_info *dimminfop;
228 	int instance, len, err;
229 
230 	/* get the instance of this devi */
231 	instance = ddi_get_instance(devi);
232 
233 	switch (cmd) {
234 	case DDI_ATTACH:
235 		break;
236 
237 	case DDI_RESUME:
238 		/* get the soft state pointer for this device node */
239 		softsp = ddi_get_soft_state(mcp, instance);
240 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
241 		    instance));
242 		/*
243 		 * During resume, the source and target board's bank_infos
244 		 * need to be updated with the new mc MADR values.  This is
245 		 * implemented with existing functionality by first removing
246 		 * the props and allocated data structs, and then adding them
247 		 * back in.
248 		 */
249 		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
250 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
251 		    MEM_CFG_PROP_NAME) == 1) {
252 			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
253 			    MEM_CFG_PROP_NAME);
254 		}
255 		mlayout_del(softsp->portid, 0);
256 		if (mc_get_mcregs(softsp) == -1) {
257 			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
258 			    instance);
259 		}
260 		return (DDI_SUCCESS);
261 
262 	default:
263 		return (DDI_FAILURE);
264 	}
265 
266 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
267 		return (DDI_FAILURE);
268 
269 	softsp = ddi_get_soft_state(mcp, instance);
270 
271 	/* Set the dip in the soft state */
272 	softsp->dip = devi;
273 
274 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
275 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
276 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
277 		    instance, "portid"));
278 		goto bad;
279 	}
280 
281 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
282 	    instance, softsp->portid, CPU->cpu_id));
283 
284 	/* map in the registers for this device. */
285 	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
286 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
287 		    instance));
288 		goto bad;
289 	}
290 
291 	/*
292 	 * Get the label of dimms and pin routing information at memory-layout
293 	 * property if the memory controller is enabled.
294 	 *
295 	 * Basically every memory-controller node on every machine should
296 	 * have one of these properties unless the memory controller is
297 	 * physically not capable of having memory attached to it, e.g.
298 	 * Excalibur's slave processor.
299 	 */
300 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
301 	    "memory-layout", (caddr_t)&dimminfop, &len);
302 	if (err == DDI_PROP_SUCCESS) {
303 		/*
304 		 * Set the pointer and size of property in the soft state
305 		 */
306 		softsp->memlayoutp = dimminfop;
307 		softsp->size = len;
308 	} else if (err == DDI_PROP_NOT_FOUND) {
309 		/*
310 		 * This is a disable MC. Clear out the pointer and size
311 		 * of property in the soft state
312 		 */
313 		softsp->memlayoutp = NULL;
314 		softsp->size = 0;
315 	} else {
316 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
317 		    instance, (void *)dimminfop));
318 		goto bad2;
319 	}
320 
321 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
322 	    instance, (void *)dimminfop, *(uint64_t *)dimminfop, len));
323 
324 	/* Get MC registers and construct all needed data structure */
325 	if (mc_get_mcregs(softsp) == -1)
326 		goto bad1;
327 
328 	mutex_enter(&mcmutex);
329 	if (nmcs == 1) {
330 		if (&p2get_mem_unum)
331 			p2get_mem_unum = mc_get_mem_unum;
332 		if (&p2get_mem_info)
333 			p2get_mem_info = mc_get_mem_info;
334 		if (&p2get_mem_sid)
335 			p2get_mem_sid = mc_get_mem_sid;
336 		if (&p2get_mem_offset)
337 			p2get_mem_offset = mc_get_mem_offset;
338 		if (&p2get_mem_addr)
339 			p2get_mem_addr = mc_get_mem_addr;
340 		if (&p2init_sid_cache)
341 			p2init_sid_cache = mc_init_sid_cache;
342 	}
343 
344 	mutex_exit(&mcmutex);
345 
346 	/*
347 	 * Update DIMM serial id information if the DIMM serial id
348 	 * cache has already been initialized.
349 	 */
350 	if (mc_dimm_sids) {
351 		rw_enter(&mcdimmsids_rw, RW_WRITER);
352 		(void) mc_populate_sid_cache();
353 		rw_exit(&mcdimmsids_rw);
354 	}
355 
356 	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
357 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
358 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
359 		    " failed \n"));
360 		goto bad1;
361 	}
362 
363 	ddi_report_dev(devi);
364 	return (DDI_SUCCESS);
365 
366 bad1:
367 	/* release all allocated data struture for this MC */
368 	mlayout_del(softsp->portid, 0);
369 	if (softsp->memlayoutp != NULL)
370 		kmem_free(softsp->memlayoutp, softsp->size);
371 
372 	/* remove the libdevinfo property */
373 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
374 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
375 	    MEM_CFG_PROP_NAME) == 1) {
376 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
377 		    MEM_CFG_PROP_NAME);
378 	}
379 
380 bad2:
381 	/* unmap the registers for this device. */
382 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
383 
384 bad:
385 	ddi_soft_state_free(mcp, instance);
386 	return (DDI_FAILURE);
387 }
388 
389 /* ARGSUSED */
390 static int
391 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
392 {
393 	int instance;
394 	struct mc_soft_state *softsp;
395 
396 	/* get the instance of this devi */
397 	instance = ddi_get_instance(devi);
398 
399 	/* get the soft state pointer for this device node */
400 	softsp = ddi_get_soft_state(mcp, instance);
401 
402 	switch (cmd) {
403 	case DDI_SUSPEND:
404 		return (DDI_SUCCESS);
405 
406 	case DDI_DETACH:
407 		break;
408 
409 	default:
410 		return (DDI_FAILURE);
411 	}
412 
413 	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
414 	    instance, softsp->portid, softsp->memlayoutp));
415 
416 	/* remove the libdevinfo property */
417 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
418 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
419 	    MEM_CFG_PROP_NAME) == 1) {
420 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
421 		    MEM_CFG_PROP_NAME);
422 	}
423 
424 	/* release all allocated data struture for this MC */
425 	mlayout_del(softsp->portid, 1);
426 	if (softsp->memlayoutp != NULL)
427 		kmem_free(softsp->memlayoutp, softsp->size);
428 
429 	/* unmap the registers */
430 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
431 
432 	mutex_enter(&mcmutex);
433 	if (nmcs == 0) {
434 		if (&p2get_mem_unum)
435 			p2get_mem_unum = NULL;
436 		if (&p2get_mem_info)
437 			p2get_mem_info = NULL;
438 		if (&p2get_mem_sid)
439 			p2get_mem_sid = NULL;
440 		if (&p2get_mem_offset)
441 			p2get_mem_offset = NULL;
442 		if (&p2get_mem_addr)
443 			p2get_mem_addr = NULL;
444 		if (&p2init_sid_cache)
445 			p2init_sid_cache = NULL;
446 	}
447 
448 	mutex_exit(&mcmutex);
449 
450 	ddi_remove_minor_node(devi, NULL);
451 
452 	/* free up the soft state */
453 	ddi_soft_state_free(mcp, instance);
454 
455 	return (DDI_SUCCESS);
456 }
457 
458 /* ARGSUSED */
459 static int
460 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
461 {
462 
463 	/* verify that otyp is appropriate */
464 	if (otyp != OTYP_CHR) {
465 		return (EINVAL);
466 	}
467 
468 	return (0);
469 }
470 
471 /* ARGSUSED */
472 static int
473 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
474 {
475 	return (0);
476 }
477 
478 /*
479  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
480  * MCIOC_CTRLCONF, MCIOC_CONTROL.
481  *
482  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
483  * associated with various length struct. If given number is less than the
484  * number in kernel, update the number and return EINVAL so that user could
485  * allocate enough space for it.
486  *
487  */
488 
489 /* ARGSUSED */
490 static int
491 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
492 	int *rval_p)
493 {
494 	size_t	size;
495 	struct mc_memconf mcmconf;
496 	struct mc_memory *mcmem, mcmem_in;
497 	struct mc_segment *mcseg, mcseg_in;
498 	struct mc_bank mcbank;
499 	struct mc_devgrp mcdevgrp;
500 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
501 	struct mc_control *mccontrol, mccontrol_in;
502 	struct seg_info *seg = NULL;
503 	struct bank_info *bank = NULL;
504 	struct dgrp_info *dgrp = NULL;
505 	struct mctrl_info *mcport;
506 	mc_dlist_t *mctrl;
507 	int i, status = 0;
508 	cpu_t *cpu;
509 
510 	switch (cmd) {
511 	case MCIOC_MEMCONF:
512 		mutex_enter(&mcdatamutex);
513 
514 		mcmconf.nmcs = nmcs;
515 		mcmconf.nsegments = nsegments;
516 		mcmconf.nbanks = maxbanks;
517 		mcmconf.ndevgrps = NDGRPS;
518 		mcmconf.ndevs = NDIMMS;
519 		mcmconf.len_dev = MAX_DEVLEN;
520 		mcmconf.xfer_size = TRANSFER_SIZE;
521 
522 		mutex_exit(&mcdatamutex);
523 
524 		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
525 			return (EFAULT);
526 		return (0);
527 
528 	/*
529 	 * input: nsegments and allocate space for various length of segmentids
530 	 *
531 	 * return    0: size, number of segments, and all segment ids,
532 	 *		where glocal and local ids are identical.
533 	 *	EINVAL: if the given nsegments is less than that in kernel and
534 	 *		nsegments of struct will be updated.
535 	 *	EFAULT: if other errors in kernel.
536 	 */
537 	case MCIOC_MEM:
538 		if (copyin((void *)arg, &mcmem_in,
539 		    sizeof (struct mc_memory)) != 0)
540 			return (EFAULT);
541 
542 		mutex_enter(&mcdatamutex);
543 		if (mcmem_in.nsegments < nsegments) {
544 			mcmem_in.nsegments = nsegments;
545 			if (copyout(&mcmem_in, (void *)arg,
546 			    sizeof (struct mc_memory)))
547 				status = EFAULT;
548 			else
549 				status = EINVAL;
550 
551 			mutex_exit(&mcdatamutex);
552 			return (status);
553 		}
554 
555 		size = sizeof (struct mc_memory) + (nsegments - 1) *
556 		    sizeof (mcmem->segmentids[0]);
557 		mcmem = kmem_zalloc(size, KM_SLEEP);
558 
559 		mcmem->size = memsize;
560 		mcmem->nsegments = nsegments;
561 		seg = (struct seg_info *)seg_head;
562 		for (i = 0; i < nsegments; i++) {
563 			ASSERT(seg != NULL);
564 			mcmem->segmentids[i].globalid = seg->seg_node.id;
565 			mcmem->segmentids[i].localid = seg->seg_node.id;
566 			seg = (struct seg_info *)seg->seg_node.next;
567 		}
568 		mutex_exit(&mcdatamutex);
569 
570 		if (copyout(mcmem, (void *)arg, size))
571 			status = EFAULT;
572 
573 		kmem_free(mcmem, size);
574 		return (status);
575 
576 	/*
577 	 * input: id, nbanks and allocate space for various length of bankids
578 	 *
579 	 * return    0: base, size, number of banks, and all bank ids,
580 	 *		where global id is unique of all banks and local id
581 	 *		is only unique for mc.
582 	 *	EINVAL: either id isn't found or if given nbanks is less than
583 	 *		that in kernel and nbanks of struct will be updated.
584 	 *	EFAULT: if other errors in kernel.
585 	 */
586 	case MCIOC_SEG:
587 
588 		if (copyin((void *)arg, &mcseg_in,
589 		    sizeof (struct mc_segment)) != 0)
590 			return (EFAULT);
591 
592 		mutex_enter(&mcdatamutex);
593 		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
594 		    seg_head)) == NULL) {
595 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
596 			    "id %d\n", mcseg_in.id));
597 			mutex_exit(&mcdatamutex);
598 			return (EFAULT);
599 		}
600 
601 		if (mcseg_in.nbanks < seg->nbanks) {
602 			mcseg_in.nbanks = seg->nbanks;
603 			if (copyout(&mcseg_in, (void *)arg,
604 			    sizeof (struct mc_segment)))
605 				status = EFAULT;
606 			else
607 				status = EINVAL;
608 
609 			mutex_exit(&mcdatamutex);
610 			return (status);
611 		}
612 
613 		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
614 		    sizeof (mcseg->bankids[0]);
615 		mcseg = kmem_zalloc(size, KM_SLEEP);
616 
617 		mcseg->id = seg->seg_node.id;
618 		mcseg->ifactor = seg->ifactor;
619 		mcseg->base = seg->base;
620 		mcseg->size = seg->size;
621 		mcseg->nbanks = seg->nbanks;
622 
623 		bank = seg->hb_inseg;
624 
625 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
626 		    seg->nbanks, (void *)seg, (void *)bank));
627 
628 		i = 0;
629 		while (bank != NULL) {
630 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
631 			    i, bank->bank_node.id));
632 			mcseg->bankids[i].globalid = bank->bank_node.id;
633 			mcseg->bankids[i++].localid =
634 			    bank->local_id;
635 			bank = bank->n_inseg;
636 		}
637 		ASSERT(i == seg->nbanks);
638 		mutex_exit(&mcdatamutex);
639 
640 		if (copyout(mcseg, (void *)arg, size))
641 			status = EFAULT;
642 
643 		kmem_free(mcseg, size);
644 		return (status);
645 
646 	/*
647 	 * input: id
648 	 *
649 	 * return    0: mask, match, size, and devgrpid,
650 	 *		where global id is unique of all devgrps and local id
651 	 *		is only unique for mc.
652 	 *	EINVAL: if id isn't found
653 	 *	EFAULT: if other errors in kernel.
654 	 */
655 	case MCIOC_BANK:
656 		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
657 			return (EFAULT);
658 
659 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
660 
661 		mutex_enter(&mcdatamutex);
662 
663 		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
664 		    bank_head)) == NULL) {
665 			mutex_exit(&mcdatamutex);
666 			return (EINVAL);
667 		}
668 
669 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
670 		    bank->bank_node.id, (void *)bank, bank->valid));
671 
672 		/*
673 		 * If (Physic Address & MASK) == MATCH, Physic Address is
674 		 * located at this bank. The lower physical address bits
675 		 * are at [9-6].
676 		 */
677 		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
678 		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
679 		mcbank.match = bank->lm << MADR_LPA_SHIFT;
680 		mcbank.size = bank->size;
681 		mcbank.devgrpid.globalid = bank->devgrp_id;
682 		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
683 
684 		mutex_exit(&mcdatamutex);
685 
686 		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
687 			return (EFAULT);
688 		return (0);
689 
690 	/*
691 	 * input:id and allocate space for various length of deviceids
692 	 *
693 	 * return    0: size and number of devices.
694 	 *	EINVAL: id isn't found
695 	 *	EFAULT: if other errors in kernel.
696 	 */
697 	case MCIOC_DEVGRP:
698 
699 		if (copyin((void *)arg, &mcdevgrp,
700 		    sizeof (struct mc_devgrp)) != 0)
701 			return (EFAULT);
702 
703 		mutex_enter(&mcdatamutex);
704 		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
705 		    dgrp_head)) == NULL) {
706 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
707 			    "%d\n", mcdevgrp.id));
708 			mutex_exit(&mcdatamutex);
709 			return (EINVAL);
710 		}
711 
712 		mcdevgrp.ndevices = dgrp->ndevices;
713 		mcdevgrp.size = dgrp->size;
714 
715 		mutex_exit(&mcdatamutex);
716 
717 		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
718 			status = EFAULT;
719 
720 		return (status);
721 
722 	/*
723 	 * input: nmcs and allocate space for various length of mcids
724 	 *
725 	 * return    0: number of mc, and all mcids,
726 	 *		where glocal and local ids are identical.
727 	 *	EINVAL: if the given nmcs is less than that in kernel and
728 	 *		nmcs of struct will be updated.
729 	 *	EFAULT: if other errors in kernel.
730 	 */
731 	case MCIOC_CTRLCONF:
732 		if (copyin((void *)arg, &mcctrlconf_in,
733 		    sizeof (struct mc_ctrlconf)) != 0)
734 			return (EFAULT);
735 
736 		mutex_enter(&mcdatamutex);
737 		if (mcctrlconf_in.nmcs < nmcs) {
738 			mcctrlconf_in.nmcs = nmcs;
739 			if (copyout(&mcctrlconf_in, (void *)arg,
740 			    sizeof (struct mc_ctrlconf)))
741 				status = EFAULT;
742 			else
743 				status = EINVAL;
744 
745 			mutex_exit(&mcdatamutex);
746 			return (status);
747 		}
748 
749 		/*
750 		 * Cannot just use the size of the struct because of the various
751 		 * length struct
752 		 */
753 		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
754 		    sizeof (mcctrlconf->mcids[0]));
755 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
756 
757 		mcctrlconf->nmcs = nmcs;
758 
759 		/* Get all MC ids and add to mcctrlconf */
760 		mctrl = mctrl_head;
761 		i = 0;
762 		while (mctrl != NULL) {
763 			mcctrlconf->mcids[i].globalid = mctrl->id;
764 			mcctrlconf->mcids[i].localid = mctrl->id;
765 			i++;
766 			mctrl = mctrl->next;
767 		}
768 		ASSERT(i == nmcs);
769 
770 		mutex_exit(&mcdatamutex);
771 
772 		if (copyout(mcctrlconf, (void *)arg, size))
773 			status = EFAULT;
774 
775 		kmem_free(mcctrlconf, size);
776 		return (status);
777 
778 	/*
779 	 * input:id, ndevgrps and allocate space for various length of devgrpids
780 	 *
781 	 * return    0: number of devgrp, and all devgrpids,
782 	 *		is unique of all devgrps and local id is only unique
783 	 *		for mc.
784 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
785 	 *		less than that in kernel and ndevgrps of struct will
786 	 *		be updated.
787 	 *	EFAULT: if other errors in kernel.
788 	 */
789 	case MCIOC_CONTROL:
790 		if (copyin((void *)arg, &mccontrol_in,
791 		    sizeof (struct mc_control)) != 0)
792 			return (EFAULT);
793 
794 		mutex_enter(&mcdatamutex);
795 		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
796 		    mctrl_head)) == NULL) {
797 			mutex_exit(&mcdatamutex);
798 			return (EINVAL);
799 		}
800 
801 		/*
802 		 * mcport->ndevgrps zero means Memory Controller is disable.
803 		 */
804 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
805 		    (mcport->ndevgrps == 0)) {
806 			mccontrol_in.ndevgrps = mcport->ndevgrps;
807 			if (copyout(&mccontrol_in, (void *)arg,
808 			    sizeof (struct mc_control)))
809 				status = EFAULT;
810 			else if (mcport->ndevgrps != 0)
811 				status = EINVAL;
812 
813 			mutex_exit(&mcdatamutex);
814 			return (status);
815 		}
816 
817 		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
818 		    sizeof (mccontrol->devgrpids[0]);
819 		mccontrol = kmem_zalloc(size, KM_SLEEP);
820 
821 		mccontrol->id = mcport->mctrl_node.id;
822 		mccontrol->ndevgrps = mcport->ndevgrps;
823 		for (i = 0; i < mcport->ndevgrps; i++) {
824 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
825 			mccontrol->devgrpids[i].localid =
826 			    mcport->devgrpids[i] % NDGRPS;
827 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
828 			    *(uint64_t *)&mccontrol->devgrpids[i]));
829 		}
830 		mutex_exit(&mcdatamutex);
831 
832 		if (copyout(mccontrol, (void *)arg, size))
833 			status = EFAULT;
834 
835 		kmem_free(mccontrol, size);
836 		return (status);
837 
838 	/*
839 	 * input:id
840 	 *
841 	 * return    0: CPU flushed successfully.
842 	 *	EINVAL: the id wasn't found
843 	 */
844 	case MCIOC_ECFLUSH:
845 		mutex_enter(&cpu_lock);
846 		cpu = cpu_get((processorid_t)arg);
847 		mutex_exit(&cpu_lock);
848 		if (cpu == NULL)
849 			return (EINVAL);
850 
851 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
852 
853 		return (0);
854 
855 	default:
856 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
857 		return (EFAULT);
858 	}
859 }
860 
861 /*
862  * Get Memory Address Decoding Registers and construct list.
863  * flag is to workaround Cheetah's restriction where register cannot be mapped
864  * if port id(MC registers on it) == cpu id(process is running on it).
865  */
866 static int
867 mc_get_mcregs(struct mc_soft_state *softsp)
868 {
869 	int i;
870 	int err = 0;
871 	uint64_t madreg;
872 	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
873 
874 	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
875 	mc_construct(softsp->portid, softsp->memlayoutp);
876 
877 	/*
878 	 * If memlayoutp is NULL, the Memory Controller is disable, and
879 	 * doesn't need to create any bank and segment.
880 	 */
881 	if (softsp->memlayoutp == NULL)
882 		goto exit;
883 
884 	/*
885 	 * Get the content of 4 Memory Address Decoding Registers, and
886 	 * construct lists of logical banks and segments.
887 	 */
888 	for (i = 0; i < NBANKS; i++) {
889 		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
890 		    "cpu=%d\n", (void *)softsp->mc_base, softsp->portid,
891 		    CPU->cpu_id));
892 
893 		kpreempt_disable();
894 		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
895 			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
896 		else
897 			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
898 			    (i * REGOFFSET)));
899 		kpreempt_enable();
900 
901 		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
902 		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
903 
904 		ma_reg_array[i] = madreg;
905 
906 		if ((err = mlayout_add(softsp->portid, i, madreg,
907 		    softsp->memlayoutp)) == -1)
908 			break;
909 	}
910 
911 	/*
912 	 * Create the logical bank property for this mc node. This
913 	 * property is an encoded array of the madr for each logical
914 	 * bank (there are NBANKS of these).
915 	 */
916 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
917 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
918 	    MEM_CFG_PROP_NAME) != 1) {
919 		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
920 		    DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
921 		    (caddr_t)&ma_reg_array, sizeof (ma_reg_array));
922 	}
923 
924 exit:
925 	if (!err) {
926 		mutex_enter(&mcdatamutex);
927 		nmcs++;
928 		mutex_exit(&mcdatamutex);
929 	}
930 	return (err);
931 }
932 
933 /*
934  * Translate a <DIMM, offset> pair to a physical address.
935  */
936 static int
937 mc_offset_to_addr(struct seg_info *seg,
938     struct bank_info *bank, uint64_t off, uint64_t *addr)
939 {
940 	uint64_t base, size, line, remainder;
941 	uint32_t ifactor;
942 
943 	/*
944 	 * Compute the half-dimm size in bytes.
945 	 * Note that bank->size represents the number of data bytes,
946 	 * and does not include the additional bits used for ecc, mtag,
947 	 * and mtag ecc information in each 144-bit checkword.
948 	 * For calculating the offset to a checkword we need the size
949 	 * including the additional 8 bytes for each 64 data bytes of
950 	 * a cache line.
951 	 */
952 	size = ((bank->size / 4) / 64) * 72;
953 
954 	/*
955 	 * Check if the offset is within this bank. This depends on the position
956 	 * of the bank, i.e., whether it is the front bank or the back bank.
957 	 */
958 	base = size * bank->pos;
959 
960 	if ((off < base) || (off >= (base + size)))
961 		return (-1);
962 
963 	/*
964 	 * Compute the offset within the half-dimm.
965 	 */
966 	off -= base;
967 
968 	/*
969 	 * Compute the line within the half-dimm. This is the same as the line
970 	 * within the bank since each DIMM in a bank contributes uniformly
971 	 * 144 bits (18 bytes) to a cache line.
972 	 */
973 	line = off / QWORD_SIZE_BYTES;
974 
975 	remainder = off % QWORD_SIZE_BYTES;
976 
977 	/*
978 	 * Compute the line within the segment.
979 	 * The bank->lm field indicates the order in which cache lines are
980 	 * distributed across the banks of a segment (See the Cheetah PRM).
981 	 * The interleave factor the bank is programmed with is used instead
982 	 * of the segment interleave factor since a segment can be composed
983 	 * of banks with different interleave factors if the banks are not
984 	 * uniform in size.
985 	 */
986 	ifactor = (bank->lk ^ 0xF) + 1;
987 	line = (line * ifactor) + bank->lm;
988 
989 	/*
990 	 * Compute the physical address assuming that there are 64 data bytes
991 	 * in a cache line.
992 	 */
993 	*addr = (line << 6) + seg->base;
994 	*addr += remainder * 16;
995 
996 	return (0);
997 }
998 
999 /*
1000  * Translate a physical address to a <DIMM, offset> pair.
1001  */
1002 static void
1003 mc_addr_to_offset(struct seg_info *seg,
1004     struct bank_info *bank, uint64_t addr, uint64_t *off)
1005 {
1006 	uint64_t base, size, line, remainder;
1007 	uint32_t ifactor;
1008 
1009 	/*
1010 	 * Compute the line within the segment assuming that there are 64 data
1011 	 * bytes in a cache line.
1012 	 */
1013 	line = (addr - seg->base) / 64;
1014 
1015 	/*
1016 	 * The lm (lower match) field from the Memory Address Decoding Register
1017 	 * for this bank determines which lines within a memory segment this
1018 	 * bank should respond to.  These are the actual address bits the
1019 	 * interleave is done over (See the Cheetah PRM).
1020 	 * In other words, the lm field indicates the order in which the cache
1021 	 * lines are distributed across the banks of a segment, and thusly it
1022 	 * can be used to compute the line within this bank. This is the same as
1023 	 * the line within the half-dimm. This is because each DIMM in a bank
1024 	 * contributes uniformly to every cache line.
1025 	 */
1026 	ifactor = (bank->lk ^ 0xF) + 1;
1027 	line = (line - bank->lm)/ifactor;
1028 
1029 	/*
1030 	 * Compute the offset within the half-dimm. This depends on whether
1031 	 * or not the bank is a front logical bank or a back logical bank.
1032 	 */
1033 	*off = line * QWORD_SIZE_BYTES;
1034 
1035 	/*
1036 	 * Compute the half-dimm size in bytes.
1037 	 * Note that bank->size represents the number of data bytes,
1038 	 * and does not include the additional bits used for ecc, mtag,
1039 	 * and mtag ecc information in each 144-bit quadword.
1040 	 * For calculating the offset to a checkword we need the size
1041 	 * including the additional 8 bytes for each 64 data bytes of
1042 	 * a cache line.
1043 	 */
1044 	size = ((bank->size / 4) / 64) * 72;
1045 
1046 	/*
1047 	 * Compute the offset within the dimm to the nearest line. This depends
1048 	 * on whether or not the bank is a front logical bank or a back logical
1049 	 * bank.
1050 	 */
1051 	base = size * bank->pos;
1052 	*off += base;
1053 
1054 	remainder = (addr - seg->base) % 64;
1055 	remainder /= 16;
1056 	*off += remainder;
1057 }
1058 
1059 /*
1060  * A cache line is composed of four quadwords with the associated ECC, the
1061  * MTag along with its associated ECC. This is depicted below:
1062  *
1063  * |                    Data                    |   ECC   | Mtag |MTag ECC|
1064  *  127                                         0 8       0 2    0 3      0
1065  *
1066  * synd_code will be mapped as the following order to mc_get_mem_unum.
1067  *  143                                         16        7      4        0
1068  *
1069  * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
1070  *  575         432 431         288 287         144 143		   0
1071  *
1072  * dimm table: each bit at a cache line needs two bits to present one of
1073  *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
1074  *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
1075  *
1076  * pin table: each bit at a cache line needs one byte to present pin position,
1077  *      where max. is 230. So it needs 576 bytes. The order of table index is
1078  *      the same as bit position at a cache line, i.e. pin_table[0] presents
1079  *      for bit 0, Mtag ECC 0 of Quadword 3.
1080  *
1081  * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
1082  * Referring to Figure 3-4, Excalibur Architecture Manual.
1083  * This table could be moved to cheetah.c if other platform teams agree with
1084  * the bit layout at QuadWord.
1085  */
1086 
1087 static uint8_t qwordmap[] =
1088 {
1089 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1090 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1091 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1092 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1093 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1094 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1095 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1096 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1097 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
1098 };
1099 
1100 
1101 /* ARGSUSED */
1102 static int
1103 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1104 {
1105 	int i, upper_pa, lower_pa, dimmoffset;
1106 	int quadword, pos_cacheline, position, index, idx4dimm;
1107 	int qwlayout = synd_code;
1108 	short offset, data;
1109 	char unum[UNUM_NAMLEN];
1110 	struct dimm_info *dimmp;
1111 	struct pin_info *pinp;
1112 	struct bank_info *bank;
1113 
1114 	/*
1115 	 * Enforce old Openboot requirement for synd code, either a single-bit
1116 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1117 	 */
1118 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1119 		return (EINVAL);
1120 
1121 	unum[0] = '\0';
1122 
1123 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1124 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1125 
1126 	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
1127 
1128 	/*
1129 	 * Scan all logical banks to get one responding to the physical
1130 	 * address. Then compute the index to look up dimm and pin tables
1131 	 * to generate the unum.
1132 	 */
1133 	mutex_enter(&mcdatamutex);
1134 	bank = (struct bank_info *)bank_head;
1135 	while (bank != NULL) {
1136 		int bankid, mcid, bankno_permc;
1137 
1138 		bankid = bank->bank_node.id;
1139 		bankno_permc = bankid % NBANKS;
1140 		mcid = bankid / NBANKS;
1141 
1142 		/*
1143 		 * The Address Decoding logic decodes the different fields
1144 		 * in the Memory Address Decoding register to determine
1145 		 * whether a particular logical bank should respond to a
1146 		 * physical address.
1147 		 */
1148 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1149 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1150 			bank = (struct bank_info *)bank->bank_node.next;
1151 			continue;
1152 		}
1153 
1154 		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
1155 
1156 		dimmp = (struct dimm_info *)bank->dimminfop;
1157 		ASSERT(dimmp != NULL);
1158 
1159 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1160 			/*
1161 			 * single-bit error handling, we can identify specific
1162 			 * DIMM.
1163 			 */
1164 
1165 			pinp = (struct pin_info *)&dimmp->data[0];
1166 
1167 			if (!dimmp->sym_flag)
1168 				pinp++;
1169 
1170 			quadword = (paddr & 0x3f) / 16;
1171 			/* or quadword = (paddr >> 4) % 4; */
1172 			pos_cacheline = ((3 - quadword) * QWORD_SIZE) +
1173 			    qwordmap[qwlayout];
1174 			position = 575 - pos_cacheline;
1175 			index = position * 2 / 8;
1176 			offset = position % 4;
1177 
1178 			/*
1179 			 * Trade-off: We couldn't add pin number to
1180 			 * unum string because statistic number
1181 			 * pumps up at the corresponding dimm not pin.
1182 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1183 			 * pinp->pintable[pos_cacheline]);
1184 			 */
1185 			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1186 			    (uint_t)pinp->pintable[pos_cacheline]));
1187 			data = pinp->dimmtable[index];
1188 			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1189 
1190 			(void) strncpy(unum,
1191 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1192 			    UNUM_NAMLEN);
1193 			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1194 			/*
1195 			 * platform hook for adding label information to unum.
1196 			 */
1197 			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1198 			    idx4dimm);
1199 		} else {
1200 			char *p = unum;
1201 			size_t res = UNUM_NAMLEN;
1202 
1203 			/*
1204 			 * multi-bit error handling, we can only identify
1205 			 * bank of DIMMs.
1206 			 */
1207 
1208 			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1209 				(void) snprintf(p, res, "%s%s",
1210 				    i == 0 ? "" : " ",
1211 				    (char *)dimmp->label[dimmoffset + i]);
1212 				res -= strlen(p);
1213 				p += strlen(p);
1214 			}
1215 
1216 			/*
1217 			 * platform hook for adding label information
1218 			 * to unum.
1219 			 */
1220 			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1221 		}
1222 		mutex_exit(&mcdatamutex);
1223 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1224 		    (strlen(unum) >= buflen)) {
1225 			return (ENAMETOOLONG);
1226 		} else {
1227 			(void) strncpy(buf, unum, buflen);
1228 			*lenp = strlen(buf);
1229 			return (0);
1230 		}
1231 	}	/* end of while loop for logical bank list */
1232 
1233 	mutex_exit(&mcdatamutex);
1234 	return (ENXIO);
1235 }
1236 
1237 /* ARGSUSED */
1238 static int
1239 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
1240 {
1241 	int upper_pa, lower_pa;
1242 	struct bank_info *bank;
1243 	struct seg_info *seg;
1244 
1245 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1246 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1247 
1248 	/*
1249 	 * Scan all logical banks to get one responding to the physical
1250 	 * address.
1251 	 */
1252 	mutex_enter(&mcdatamutex);
1253 	bank = (struct bank_info *)bank_head;
1254 	while (bank != NULL) {
1255 		/*
1256 		 * The Address Decoding logic decodes the different fields
1257 		 * in the Memory Address Decoding register to determine
1258 		 * whether a particular logical bank should respond to a
1259 		 * physical address.
1260 		 */
1261 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1262 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1263 			bank = (struct bank_info *)bank->bank_node.next;
1264 			continue;
1265 		}
1266 
1267 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1268 		ASSERT(seg != NULL);
1269 		ASSERT(paddr >= seg->base);
1270 
1271 		mc_addr_to_offset(seg, bank, paddr, offp);
1272 
1273 		mutex_exit(&mcdatamutex);
1274 		return (0);
1275 	}
1276 
1277 	mutex_exit(&mcdatamutex);
1278 	return (ENXIO);
1279 }
1280 
1281 /*
1282  * Translate a DIMM <id, offset> pair to a physical address.
1283  */
1284 static int
1285 mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr)
1286 {
1287 	struct seg_info *seg;
1288 	struct bank_info *bank;
1289 	int first_seg_id;
1290 	int i, found;
1291 
1292 	ASSERT(sid != NULL);
1293 
1294 	mutex_enter(&mcdatamutex);
1295 
1296 	rw_enter(&mcdimmsids_rw, RW_READER);
1297 
1298 	/*
1299 	 * If DIMM serial ids have not been cached yet, tell the
1300 	 * caller to try again.
1301 	 */
1302 	if (mc_dimm_sids == NULL) {
1303 		rw_exit(&mcdimmsids_rw);
1304 		return (EAGAIN);
1305 	}
1306 
1307 	for (i = 0; i < max_entries; i++) {
1308 		if (mc_dimm_sids[i].mcid == mcid)
1309 			break;
1310 	}
1311 
1312 	if (i == max_entries) {
1313 		rw_exit(&mcdimmsids_rw);
1314 		mutex_exit(&mcdatamutex);
1315 		return (ENODEV);
1316 	}
1317 
1318 	first_seg_id = mc_dimm_sids[i].seg_id;
1319 
1320 	seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head);
1321 
1322 	rw_exit(&mcdimmsids_rw);
1323 
1324 	if (seg == NULL) {
1325 		mutex_exit(&mcdatamutex);
1326 		return (ENODEV);
1327 	}
1328 
1329 	found = 0;
1330 
1331 	for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) {
1332 		ASSERT(bank->valid);
1333 
1334 		for (i = 0; i < NDIMMS; i++) {
1335 			if (strncmp((char *)bank->dimmsidp[i], sid,
1336 			    DIMM_SERIAL_ID_LEN)  == 0)
1337 				break;
1338 		}
1339 
1340 		if (i == NDIMMS)
1341 			continue;
1342 
1343 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1)
1344 			continue;
1345 		found = 1;
1346 		break;
1347 	}
1348 
1349 	if (found) {
1350 		mutex_exit(&mcdatamutex);
1351 		return (0);
1352 	}
1353 
1354 	/*
1355 	 * If a bank wasn't found, it may be in another segment.
1356 	 * This can happen if the different logical banks of an MC
1357 	 * have different interleave factors.  To deal with this
1358 	 * possibility, we'll do a brute-force search for banks
1359 	 * for this MC with a different seg id then above.
1360 	 */
1361 	bank = (struct bank_info *)bank_head;
1362 	while (bank != NULL) {
1363 
1364 		if (!bank->valid) {
1365 			bank = (struct bank_info *)bank->bank_node.next;
1366 			continue;
1367 		}
1368 
1369 		if (bank->bank_node.id / NBANKS != mcid) {
1370 			bank = (struct bank_info *)bank->bank_node.next;
1371 			continue;
1372 		}
1373 
1374 		/* Ignore banks in the segment we looked in above. */
1375 		if (bank->seg_id == mc_dimm_sids[i].seg_id) {
1376 			bank = (struct bank_info *)bank->bank_node.next;
1377 			continue;
1378 		}
1379 
1380 		for (i = 0; i < NDIMMS; i++) {
1381 			if (strncmp((char *)bank->dimmsidp[i], sid,
1382 			    DIMM_SERIAL_ID_LEN)  == 0)
1383 				break;
1384 		}
1385 
1386 		if (i == NDIMMS) {
1387 			bank = (struct bank_info *)bank->bank_node.next;
1388 			continue;
1389 		}
1390 
1391 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1392 
1393 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1) {
1394 			bank = (struct bank_info *)bank->bank_node.next;
1395 			continue;
1396 		}
1397 
1398 		found = 1;
1399 		break;
1400 	}
1401 
1402 	mutex_exit(&mcdatamutex);
1403 
1404 	if (found)
1405 		return (0);
1406 	else
1407 		return (ENOENT);
1408 }
1409 
1410 static int
1411 mc_get_mem_info(int synd_code, uint64_t paddr,
1412     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1413     int *segsp, int *banksp, int *mcidp)
1414 {
1415 	int upper_pa, lower_pa;
1416 	struct bank_info *bankp;
1417 
1418 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1419 		return (EINVAL);
1420 
1421 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1422 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1423 
1424 	/*
1425 	 * Scan all logical banks to get one responding to the physical
1426 	 * address.
1427 	 */
1428 	mutex_enter(&mcdatamutex);
1429 	bankp = (struct bank_info *)bank_head;
1430 	while (bankp != NULL) {
1431 		struct seg_info *segp;
1432 		int bankid, mcid;
1433 
1434 		bankid = bankp->bank_node.id;
1435 		mcid = bankid / NBANKS;
1436 
1437 		/*
1438 		 * The Address Decoding logic decodes the different fields
1439 		 * in the Memory Address Decoding register to determine
1440 		 * whether a particular logical bank should respond to a
1441 		 * physical address.
1442 		 */
1443 		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1444 		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1445 			bankp = (struct bank_info *)bankp->bank_node.next;
1446 			continue;
1447 		}
1448 
1449 		/*
1450 		 * Get the corresponding segment.
1451 		 */
1452 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1453 		    seg_head)) == NULL) {
1454 			mutex_exit(&mcdatamutex);
1455 			return (EFAULT);
1456 		}
1457 
1458 		*mem_sizep = memsize;
1459 		*seg_sizep = segp->size;
1460 		*bank_sizep = bankp->size;
1461 		*segsp = nsegments;
1462 		*banksp = segp->nbanks;
1463 		*mcidp = mcid;
1464 
1465 		mutex_exit(&mcdatamutex);
1466 
1467 		return (0);
1468 
1469 	}	/* end of while loop for logical bank list */
1470 
1471 	mutex_exit(&mcdatamutex);
1472 	return (ENXIO);
1473 }
1474 
1475 /*
1476  * Construct lists for an enabled MC where size of memory is 0.
1477  * The lists are connected as follows:
1478  * Attached MC -> device group list -> device list(per devgrp).
1479  */
1480 static void
1481 mc_construct(int mc_id, void *dimminfop)
1482 {
1483 	int i, j, idx, dmidx;
1484 	struct mctrl_info *mctrl;
1485 	struct dgrp_info *dgrp;
1486 	struct device_info *dev;
1487 	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1488 
1489 	mutex_enter(&mcdatamutex);
1490 	/* allocate for mctrl_info and bank_info */
1491 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1492 	    mctrl_head)) != NULL) {
1493 		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1494 		mutex_exit(&mcdatamutex);
1495 		return;
1496 	}
1497 
1498 	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1499 
1500 	/*
1501 	 * If dimminfop is NULL, the Memory Controller is disable, and
1502 	 * the number of device group will be zero.
1503 	 */
1504 	if (dimminfop == NULL) {
1505 		mctrl->mctrl_node.id = mc_id;
1506 		mctrl->ndevgrps = 0;
1507 		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1508 		mutex_exit(&mcdatamutex);
1509 		return;
1510 	}
1511 
1512 	/* add the entry on dgrp_info list */
1513 	for (i = 0; i < NDGRPS; i++) {
1514 		idx = mc_id * NDGRPS + i;
1515 		mctrl->devgrpids[i] = idx;
1516 		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1517 		    != NULL) {
1518 			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1519 			    idx);
1520 			continue;
1521 		}
1522 
1523 		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1524 
1525 		/* add the entry on device_info list */
1526 		for (j = 0; j < NDIMMS; j++) {
1527 			dmidx = idx * NDIMMS + j;
1528 			dgrp->deviceids[j] = dmidx;
1529 			if ((dev = (struct device_info *)
1530 			    mc_node_get(dmidx, device_head)) != NULL) {
1531 				cmn_err(CE_WARN, "mc_construct: device %d "
1532 				    "exists\n", dmidx);
1533 				continue;
1534 			}
1535 			dev = kmem_zalloc(sizeof (struct device_info),
1536 			    KM_SLEEP);
1537 			dev->dev_node.id = dmidx;
1538 			dev->size = 0;
1539 			(void) strncpy(dev->label, (char *)
1540 			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1541 
1542 			mc_node_add((mc_dlist_t *)dev, &device_head,
1543 			    &device_tail);
1544 		}	/* for loop for constructing device_info */
1545 
1546 		dgrp->dgrp_node.id = idx;
1547 		dgrp->ndevices = NDIMMS;
1548 		dgrp->size = 0;
1549 		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1550 
1551 	}	/* end of for loop for constructing dgrp_info list */
1552 
1553 	mctrl->mctrl_node.id = mc_id;
1554 	mctrl->ndevgrps = NDGRPS;
1555 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1556 	mutex_exit(&mcdatamutex);
1557 }
1558 
1559 /*
1560  * Construct lists for Memory Configuration at logical viewpoint.
1561  *
1562  * Retrieve information from Memory Address Decoding Register and set up
1563  * bank and segment lists. Link bank to its corresponding device group, and
1564  * update size of device group and devices. Also connect bank to the segment.
1565  *
1566  * Memory Address Decoding Register
1567  * -------------------------------------------------------------------------
1568  * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1569  * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1570  * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1571  * -------------------------------------------------------------------------
1572  *
1573  */
1574 
1575 static int
1576 mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1577 {
1578 	int i, dmidx, idx;
1579 	uint32_t ifactor;
1580 	int status = 0;
1581 	uint64_t size, base;
1582 	struct seg_info *seg_curr;
1583 	struct bank_info *bank_curr;
1584 	struct dgrp_info *dgrp;
1585 	struct device_info *dev;
1586 	union {
1587 		struct {
1588 			uint64_t valid	: 1;
1589 			uint64_t resrv1	: 10;
1590 			uint64_t uk	: 12;
1591 			uint64_t resrv2	: 4;
1592 			uint64_t um	: 17;
1593 			uint64_t resrv3	: 2;
1594 			uint64_t lk	: 4;
1595 			uint64_t resrv4	: 2;
1596 			uint64_t lm	: 4;
1597 			uint64_t resrv5	: 8;
1598 		} _s;
1599 		uint64_t madreg;
1600 	} mcreg;
1601 
1602 	mcreg.madreg = reg;
1603 
1604 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1605 	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1606 
1607 	/* add the entry on bank_info list */
1608 	idx = mc_id * NBANKS + bank_no;
1609 
1610 	mutex_enter(&mcdatamutex);
1611 	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1612 	    != NULL) {
1613 		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1614 		goto exit;
1615 	}
1616 
1617 	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1618 	bank_curr->bank_node.id = idx;
1619 	bank_curr->valid = mcreg._s.valid;
1620 	bank_curr->dimminfop = dimminfop;
1621 
1622 	if (!mcreg._s.valid) {
1623 		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1624 		goto exit;
1625 	}
1626 
1627 	/*
1628 	 * size of a logical bank = size of segment / interleave factor
1629 	 * This fomula is not only working for regular configuration,
1630 	 * i.e. number of banks at a segment equals to the max
1631 	 * interleave factor, but also for special case, say 3 bank
1632 	 * interleave. One bank is 2 way interleave and other two are
1633 	 * 4 way. So the sizes of banks are size of segment/2 and /4
1634 	 * respectively.
1635 	 */
1636 	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1637 	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1638 	base = mcreg._s.um & ~mcreg._s.uk;
1639 	base <<= MADR_UPA_SHIFT;
1640 
1641 	bank_curr->uk = mcreg._s.uk;
1642 	bank_curr->um = mcreg._s.um;
1643 	bank_curr->lk = mcreg._s.lk;
1644 	bank_curr->lm = mcreg._s.lm;
1645 	bank_curr->size = size;
1646 
1647 	/*
1648 	 * The bank's position depends on which halves of the DIMMs it consists
1649 	 * of. The front-side halves of the 4 DIMMs constitute the front bank
1650 	 * and the back-side halves constitute the back bank. Bank numbers
1651 	 * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side
1652 	 * banks.
1653 	 */
1654 	bank_curr->pos = bank_no >> 1;
1655 	ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1));
1656 
1657 	/*
1658 	 * Workaround to keep gcc and SS12 lint happy.
1659 	 * Lint expects lk, uk and um in the format statement below
1660 	 * to use %lx, but this produces a warning when compiled with
1661 	 * gcc.
1662 	 */
1663 
1664 #if defined(lint)
1665 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1666 	    "lk 0x%lx uk 0x%lx um 0x%lx ifactor 0x%x size 0x%lx base 0x%lx\n",
1667 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1668 #else /* lint */
1669 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1670 	    "lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1671 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1672 #endif /* lint */
1673 
1674 	/* connect the entry and update the size on dgrp_info list */
1675 	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1676 	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1677 		/* all avaiable dgrp should be linked at mc_construct */
1678 		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1679 		kmem_free(bank_curr, sizeof (struct bank_info));
1680 		status = -1;
1681 		goto exit;
1682 	}
1683 
1684 	bank_curr->devgrp_id = idx;
1685 	dgrp->size += size;
1686 
1687 	/* Update the size of entry on device_info list */
1688 	for (i = 0; i < NDIMMS; i++) {
1689 		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1690 		dgrp->deviceids[i] = dmidx;
1691 
1692 		/* avaiable device should be linked at mc_construct */
1693 		if ((dev = (struct device_info *)mc_node_get(dmidx,
1694 		    device_head)) == NULL) {
1695 			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1696 			    dmidx);
1697 			kmem_free(bank_curr, sizeof (struct bank_info));
1698 			status = -1;
1699 			goto exit;
1700 		}
1701 
1702 		dev->size += (size / NDIMMS);
1703 
1704 		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1705 		    dmidx, size));
1706 	}
1707 
1708 	/*
1709 	 * Get the segment by matching the base address, link this bank
1710 	 * to the segment. If not matched, allocate a new segment and
1711 	 * add it at segment list.
1712 	 */
1713 	if (seg_curr = seg_match_base(base)) {
1714 		seg_curr->nbanks++;
1715 		seg_curr->size += size;
1716 		if (ifactor > seg_curr->ifactor)
1717 			seg_curr->ifactor = ifactor;
1718 		bank_curr->seg_id = seg_curr->seg_node.id;
1719 	} else {
1720 		seg_curr = (struct seg_info *)
1721 		    kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1722 		bank_curr->seg_id = seg_id;
1723 		seg_curr->seg_node.id = seg_id++;
1724 		seg_curr->base = base;
1725 		seg_curr->size = size;
1726 		seg_curr->nbanks = 1;
1727 		seg_curr->ifactor = ifactor;
1728 		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1729 
1730 		nsegments++;
1731 	}
1732 
1733 	/* Get the local id of bank which is only unique per segment. */
1734 	bank_curr->local_id = seg_curr->nbanks - 1;
1735 
1736 	/* add bank at the end of the list; not sorted by bankid */
1737 	if (seg_curr->hb_inseg != NULL) {
1738 		bank_curr->p_inseg = seg_curr->tb_inseg;
1739 		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1740 		seg_curr->tb_inseg->n_inseg = bank_curr;
1741 		seg_curr->tb_inseg = bank_curr;
1742 	} else {
1743 		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1744 		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1745 	}
1746 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1747 	    seg_curr->seg_node.id));
1748 
1749 	if (mc_dimm_sids) {
1750 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1751 		mc_update_bank(bank_curr);
1752 		rw_exit(&mcdimmsids_rw);
1753 	}
1754 	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1755 
1756 	memsize += size;
1757 	if (seg_curr->nbanks > maxbanks)
1758 		maxbanks = seg_curr->nbanks;
1759 
1760 exit:
1761 	mutex_exit(&mcdatamutex);
1762 	return (status);
1763 }
1764 
1765 /*
1766  * Delete nodes related to the given MC on mc, device group, device,
1767  * and bank lists. Moreover, delete corresponding segment if its connected
1768  * banks are all removed.
1769  *
1770  * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In
1771  * this case, the DIMM data structures need to be deleted. The argument is
1772  * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case,
1773  * the DIMM data structures are left alone.
1774  */
1775 static void
1776 mlayout_del(int mc_id, int delete)
1777 {
1778 	int i, j, dgrpid, devid, bankid, ndevgrps;
1779 	struct seg_info *seg;
1780 	struct bank_info *bank_curr;
1781 	struct mctrl_info *mctrl;
1782 	mc_dlist_t *dgrp_ptr;
1783 	mc_dlist_t *dev_ptr;
1784 	uint64_t base;
1785 
1786 	mutex_enter(&mcdatamutex);
1787 
1788 	/* delete mctrl_info */
1789 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1790 	    NULL) {
1791 		ndevgrps = mctrl->ndevgrps;
1792 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1793 		kmem_free(mctrl, sizeof (struct mctrl_info));
1794 		nmcs--;
1795 
1796 		/*
1797 		 * There is no other list left for disabled MC.
1798 		 */
1799 		if (ndevgrps == 0) {
1800 			mutex_exit(&mcdatamutex);
1801 			return;
1802 		}
1803 	} else
1804 		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1805 
1806 	/* Delete device groups and devices of the detached MC */
1807 	for (i = 0; i < NDGRPS; i++) {
1808 		dgrpid = mc_id * NDGRPS + i;
1809 		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1810 			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1811 			continue;
1812 		}
1813 
1814 		for (j = 0; j < NDIMMS; j++) {
1815 			devid = dgrpid * NDIMMS + j;
1816 			if (dev_ptr = mc_node_get(devid, device_head)) {
1817 				mc_node_del(dev_ptr, &device_head,
1818 				    &device_tail);
1819 				kmem_free(dev_ptr, sizeof (struct device_info));
1820 			} else {
1821 				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1822 				    devid);
1823 			}
1824 		}
1825 
1826 		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1827 		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1828 	}
1829 
1830 	/* Delete banks and segments if it has no bank */
1831 	for (i = 0; i < NBANKS; i++) {
1832 		bankid = mc_id * NBANKS + i;
1833 		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1834 		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1835 		    bank_head))) {
1836 			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1837 			continue;
1838 		}
1839 
1840 		if (bank_curr->valid) {
1841 			base = bank_curr->um & ~bank_curr->uk;
1842 			base <<= MADR_UPA_SHIFT;
1843 			bank_curr->valid = 0;
1844 			memsize -= bank_curr->size;
1845 
1846 			/* Delete bank at segment and segment if no bank left */
1847 			if (!(seg = seg_match_base(base))) {
1848 				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1849 				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1850 				    &bank_tail);
1851 				kmem_free(bank_curr, sizeof (struct bank_info));
1852 				continue;
1853 			}
1854 
1855 			/* update the bank list at the segment */
1856 			if (bank_curr->n_inseg == NULL) {
1857 				/* node is at the tail of list */
1858 				seg->tb_inseg = bank_curr->p_inseg;
1859 			} else {
1860 				bank_curr->n_inseg->p_inseg =
1861 				    bank_curr->p_inseg;
1862 			}
1863 
1864 			if (bank_curr->p_inseg == NULL) {
1865 				/* node is at the head of list */
1866 				seg->hb_inseg = bank_curr->n_inseg;
1867 			} else {
1868 				bank_curr->p_inseg->n_inseg =
1869 				    bank_curr->n_inseg;
1870 			}
1871 
1872 			seg->nbanks--;
1873 			seg->size -= bank_curr->size;
1874 
1875 			if (seg->nbanks == 0) {
1876 				mc_node_del((mc_dlist_t *)seg, &seg_head,
1877 				    &seg_tail);
1878 				kmem_free(seg, sizeof (struct seg_info));
1879 				nsegments--;
1880 			}
1881 
1882 		}
1883 		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1884 		kmem_free(bank_curr, sizeof (struct bank_info));
1885 	}	/* end of for loop for four banks */
1886 
1887 	if (mc_dimm_sids && delete) {
1888 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1889 		i = mc_get_sid_cache_index(mc_id);
1890 		if (i >= 0) {
1891 			mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID;
1892 			if (mc_dimm_sids[i].sids) {
1893 				kmem_free(mc_dimm_sids[i].sids,
1894 				    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS));
1895 				mc_dimm_sids[i].sids = NULL;
1896 			}
1897 		}
1898 		rw_exit(&mcdimmsids_rw);
1899 	}
1900 
1901 	mutex_exit(&mcdatamutex);
1902 }
1903 
1904 /*
1905  * Search the segment in the list starting at seg_head by base address
1906  * input: base address
1907  * return: pointer of found segment or null if not found.
1908  */
1909 static struct seg_info *
1910 seg_match_base(u_longlong_t base)
1911 {
1912 	static struct seg_info *seg_ptr;
1913 
1914 	seg_ptr = (struct seg_info *)seg_head;
1915 	while (seg_ptr != NULL) {
1916 		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1917 		    seg_ptr->base, base));
1918 		if (seg_ptr->base == base)
1919 			break;
1920 		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1921 	}
1922 	return (seg_ptr);
1923 }
1924 
1925 /*
1926  * mc_dlist is a double linking list, including unique id, and pointers to
1927  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1928  * and mctrl_info has it at the top to share the operations, add, del, and get.
1929  *
1930  * The new node is added at the tail and is not sorted.
1931  *
1932  * Input: The pointer of node to be added, head and tail of the list
1933  */
1934 
1935 static void
1936 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1937 {
1938 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1939 	    node->id, (void *)*head, (void *)*tail));
1940 
1941 	if (*head != NULL) {
1942 		node->prev = *tail;
1943 		node->next = (*tail)->next;
1944 		(*tail)->next = node;
1945 		*tail = node;
1946 	} else {
1947 		node->next = node->prev = NULL;
1948 		*head = *tail = node;
1949 	}
1950 }
1951 
1952 /*
1953  * Input: The pointer of node to be deleted, head and tail of the list
1954  *
1955  * Deleted node will be at the following positions
1956  * 1. At the tail of the list
1957  * 2. At the head of the list
1958  * 3. At the head and tail of the list, i.e. only one left.
1959  * 4. At the middle of the list
1960  */
1961 
1962 static void
1963 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1964 {
1965 	if (node->next == NULL) {
1966 		/* deleted node is at the tail of list */
1967 		*tail = node->prev;
1968 	} else {
1969 		node->next->prev = node->prev;
1970 	}
1971 
1972 	if (node->prev == NULL) {
1973 		/* deleted node is at the head of list */
1974 		*head = node->next;
1975 	} else {
1976 		node->prev->next = node->next;
1977 	}
1978 }
1979 
1980 /*
1981  * Search the list from the head of the list to match the given id
1982  * Input: id and the head of the list
1983  * Return: pointer of found node
1984  */
1985 static mc_dlist_t *
1986 mc_node_get(int id, mc_dlist_t *head)
1987 {
1988 	mc_dlist_t *node;
1989 
1990 	node = head;
1991 	while (node != NULL) {
1992 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1993 		    node->id, id));
1994 		if (node->id == id)
1995 			break;
1996 		node = node->next;
1997 	}
1998 	return (node);
1999 }
2000 
2001 /*
2002  * mc-us3 driver allows a platform to add extra label
2003  * information to the unum string. If a platform implements a
2004  * kernel function called plat_add_mem_unum_label() it will be
2005  * executed. This would typically be implemented in the platmod.
2006  */
2007 static void
2008 mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
2009 {
2010 	if (&plat_add_mem_unum_label)
2011 		plat_add_mem_unum_label(buf, mcid, bank, dimm);
2012 }
2013 
2014 static int
2015 mc_get_sid_cache_index(int mcid)
2016 {
2017 	int	i;
2018 
2019 	for (i = 0; i < max_entries; i++) {
2020 		if (mcid == mc_dimm_sids[i].mcid)
2021 			return (i);
2022 	}
2023 
2024 	return (-1);
2025 }
2026 
2027 static void
2028 mc_update_bank(struct bank_info *bank)
2029 {
2030 	int i, j;
2031 	int bankid, mcid, dgrp_no;
2032 
2033 	/*
2034 	 * Mark the MC if DIMM sids are not available.
2035 	 * Mark which segment the DIMMs belong to.  Allocate
2036 	 * space to store DIMM serial ids which are later
2037 	 * provided by the platform layer, and update the bank_info
2038 	 * structure with pointers to its serial ids.
2039 	 */
2040 	bankid = bank->bank_node.id;
2041 	mcid = bankid / NBANKS;
2042 	i = mc_get_sid_cache_index(mcid);
2043 	if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID)
2044 		mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED;
2045 
2046 	mc_dimm_sids[i].seg_id = bank->seg_id;
2047 
2048 	if (mc_dimm_sids[i].sids == NULL) {
2049 		mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc(
2050 		    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP);
2051 	}
2052 
2053 	dgrp_no = bank->devgrp_id % NDGRPS;
2054 
2055 	for (j = 0; j < NDIMMS; j++) {
2056 		bank->dimmsidp[j] =
2057 		    &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)];
2058 	}
2059 }
2060 
2061 static int
2062 mc_populate_sid_cache(void)
2063 {
2064 	struct bank_info	*bank;
2065 
2066 	if (&plat_populate_sid_cache == 0)
2067 		return (ENOTSUP);
2068 
2069 	ASSERT(RW_WRITE_HELD(&mcdimmsids_rw));
2070 
2071 	bank = (struct bank_info *)bank_head;
2072 	while (bank != NULL) {
2073 		if (!bank->valid) {
2074 			bank = (struct bank_info *)bank->bank_node.next;
2075 			continue;
2076 		}
2077 
2078 		mc_update_bank(bank);
2079 
2080 		bank = (struct bank_info *)bank->bank_node.next;
2081 	}
2082 
2083 
2084 	/*
2085 	 * Call to the platform layer to populate the cache
2086 	 * with DIMM serial ids.
2087 	 */
2088 	return (plat_populate_sid_cache(mc_dimm_sids, max_entries));
2089 }
2090 
2091 static void
2092 mc_init_sid_cache_thr(void)
2093 {
2094 	ASSERT(mc_dimm_sids == NULL);
2095 
2096 	mutex_enter(&mcdatamutex);
2097 	rw_enter(&mcdimmsids_rw, RW_WRITER);
2098 
2099 	mc_dimm_sids = plat_alloc_sid_cache(&max_entries);
2100 	(void) mc_populate_sid_cache();
2101 
2102 	rw_exit(&mcdimmsids_rw);
2103 	mutex_exit(&mcdatamutex);
2104 }
2105 
2106 static int
2107 mc_init_sid_cache(void)
2108 {
2109 	if (&plat_alloc_sid_cache) {
2110 		(void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0,
2111 		    &p0, TS_RUN, minclsyspri);
2112 		return (0);
2113 	} else
2114 		return (ENOTSUP);
2115 }
2116 
2117 static int
2118 mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp)
2119 {
2120 	int	i;
2121 
2122 	if (buflen < DIMM_SERIAL_ID_LEN)
2123 		return (ENOSPC);
2124 
2125 	/*
2126 	 * If DIMM serial ids have not been cached yet, tell the
2127 	 * caller to try again.
2128 	 */
2129 	if (!rw_tryenter(&mcdimmsids_rw, RW_READER))
2130 		return (EAGAIN);
2131 
2132 	if (mc_dimm_sids == NULL) {
2133 		rw_exit(&mcdimmsids_rw);
2134 		return (EAGAIN);
2135 	}
2136 
2137 	/*
2138 	 * Find dimm serial id using mcid and dimm #
2139 	 */
2140 	for (i = 0; i < max_entries; i++) {
2141 		if (mc_dimm_sids[i].mcid == mcid)
2142 			break;
2143 	}
2144 	if ((i == max_entries) || (!mc_dimm_sids[i].sids)) {
2145 		rw_exit(&mcdimmsids_rw);
2146 		return (ENOENT);
2147 	}
2148 
2149 	(void) strlcpy(buf, mc_dimm_sids[i].sids[dimm],
2150 	    DIMM_SERIAL_ID_LEN);
2151 	*lenp = strlen(buf);
2152 
2153 	rw_exit(&mcdimmsids_rw);
2154 	return (0);
2155 }
2156