xref: /titanic_52/usr/src/uts/sun4u/io/mc-us3.c (revision 30698f336503439d11933541bc5fac87b3f8b916)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/stat.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/open.h>
39 #include <sys/thread.h>
40 #include <sys/cpuvar.h>
41 #include <sys/x_call.h>
42 #include <sys/debug.h>
43 #include <sys/sysmacros.h>
44 #include <sys/ivintr.h>
45 #include <sys/intr.h>
46 #include <sys/intreg.h>
47 #include <sys/autoconf.h>
48 #include <sys/modctl.h>
49 #include <sys/spl.h>
50 #include <sys/async.h>
51 #include <sys/mc.h>
52 #include <sys/mc-us3.h>
53 #include <sys/cpu_module.h>
54 #include <sys/platform_module.h>
55 
56 /*
57  * Function prototypes
58  */
59 
60 static int mc_open(dev_t *, int, int, cred_t *);
61 static int mc_close(dev_t, int, int, cred_t *);
62 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
63 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
64 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /*
67  * Configuration data structures
68  */
69 static struct cb_ops mc_cb_ops = {
70 	mc_open,			/* open */
71 	mc_close,			/* close */
72 	nulldev,			/* strategy */
73 	nulldev,			/* print */
74 	nodev,				/* dump */
75 	nulldev,			/* read */
76 	nulldev,			/* write */
77 	mc_ioctl,			/* ioctl */
78 	nodev,				/* devmap */
79 	nodev,				/* mmap */
80 	nodev,				/* segmap */
81 	nochpoll,			/* poll */
82 	ddi_prop_op,			/* cb_prop_op */
83 	0,				/* streamtab */
84 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
85 	CB_REV,				/* rev */
86 	nodev,				/* cb_aread */
87 	nodev				/* cb_awrite */
88 };
89 
90 static struct dev_ops mc_ops = {
91 	DEVO_REV,			/* rev */
92 	0,				/* refcnt  */
93 	ddi_getinfo_1to1,		/* getinfo */
94 	nulldev,			/* identify */
95 	nulldev,			/* probe */
96 	mc_attach,			/* attach */
97 	mc_detach,			/* detach */
98 	nulldev,			/* reset */
99 	&mc_cb_ops,			/* cb_ops */
100 	(struct bus_ops *)0,		/* bus_ops */
101 	nulldev				/* power */
102 };
103 
104 /*
105  * Driver globals
106  */
107 static void *mcp;
108 static int nmcs = 0;
109 static int seg_id = 0;
110 static int nsegments = 0;
111 static uint64_t memsize = 0;
112 static int maxbanks = 0;
113 
114 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
115 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
116 static mc_dlist_t *device_head, *device_tail;
117 
118 static kmutex_t	mcmutex;
119 static kmutex_t	mcdatamutex;
120 static int mc_is_open = 0;
121 
122 static krwlock_t mcdimmsids_rw;
123 
124 /* pointer to cache of DIMM serial ids */
125 static dimm_sid_cache_t	*mc_dimm_sids;
126 static int		max_entries;
127 
128 extern struct mod_ops mod_driverops;
129 
130 static struct modldrv modldrv = {
131 	&mod_driverops,			/* module type, this one is a driver */
132 	"Memory-controller: %I%",	/* module name */
133 	&mc_ops,			/* driver ops */
134 };
135 
136 static struct modlinkage modlinkage = {
137 	MODREV_1,		/* rev */
138 	(void *)&modldrv,
139 	NULL
140 };
141 
142 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
143     int buflen, int *lenp);
144 static int mc_get_mem_info(int synd_code, uint64_t paddr,
145     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
146     int *segsp, int *banksp, int *mcidp);
147 static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp);
148 static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
149 static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr);
150 static int mc_init_sid_cache(void);
151 static int mc_get_mcregs(struct mc_soft_state *);
152 static void mc_construct(int mc_id, void *dimminfop);
153 static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
154 static void mlayout_del(int mc_id, int delete);
155 static struct seg_info *seg_match_base(u_longlong_t base);
156 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
157 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
158 static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
159 static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
160 static int mc_populate_sid_cache(void);
161 static int mc_get_sid_cache_index(int mcid);
162 static void mc_update_bank(struct bank_info *bank);
163 
164 #pragma weak p2get_mem_unum
165 #pragma weak p2get_mem_info
166 #pragma weak p2get_mem_sid
167 #pragma weak p2get_mem_offset
168 #pragma	weak p2get_mem_addr
169 #pragma weak p2init_sid_cache
170 #pragma weak plat_add_mem_unum_label
171 #pragma weak plat_alloc_sid_cache
172 #pragma weak plat_populate_sid_cache
173 
174 #define	QWORD_SIZE		144
175 #define	QWORD_SIZE_BYTES	(QWORD_SIZE / 8)
176 
177 /*
178  * These are the module initialization routines.
179  */
180 
181 int
182 _init(void)
183 {
184 	int error;
185 
186 	if ((error = ddi_soft_state_init(&mcp,
187 	    sizeof (struct mc_soft_state), 1)) != 0)
188 		return (error);
189 
190 	error =  mod_install(&modlinkage);
191 	if (error == 0) {
192 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
193 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
194 		rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL);
195 	}
196 
197 	return (error);
198 }
199 
200 int
201 _fini(void)
202 {
203 	int error;
204 
205 	if ((error = mod_remove(&modlinkage)) != 0)
206 		return (error);
207 
208 	ddi_soft_state_fini(&mcp);
209 	mutex_destroy(&mcmutex);
210 	mutex_destroy(&mcdatamutex);
211 	rw_destroy(&mcdimmsids_rw);
212 
213 	if (mc_dimm_sids)
214 		kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) *
215 		    max_entries);
216 
217 	return (0);
218 }
219 
220 int
221 _info(struct modinfo *modinfop)
222 {
223 	return (mod_info(&modlinkage, modinfop));
224 }
225 
226 static int
227 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
228 {
229 	struct mc_soft_state *softsp;
230 	struct dimm_info *dimminfop;
231 	int instance, len, err;
232 
233 	/* get the instance of this devi */
234 	instance = ddi_get_instance(devi);
235 
236 	switch (cmd) {
237 	case DDI_ATTACH:
238 		break;
239 
240 	case DDI_RESUME:
241 		/* get the soft state pointer for this device node */
242 		softsp = ddi_get_soft_state(mcp, instance);
243 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
244 		    instance));
245 		/*
246 		 * During resume, the source and target board's bank_infos
247 		 * need to be updated with the new mc MADR values.  This is
248 		 * implemented with existing functionality by first removing
249 		 * the props and allocated data structs, and then adding them
250 		 * back in.
251 		 */
252 		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
253 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
254 		    MEM_CFG_PROP_NAME) == 1) {
255 			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
256 			    MEM_CFG_PROP_NAME);
257 		}
258 		mlayout_del(softsp->portid, 0);
259 		if (mc_get_mcregs(softsp) == -1) {
260 			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
261 			    instance);
262 		}
263 		return (DDI_SUCCESS);
264 
265 	default:
266 		return (DDI_FAILURE);
267 	}
268 
269 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
270 		return (DDI_FAILURE);
271 
272 	softsp = ddi_get_soft_state(mcp, instance);
273 
274 	/* Set the dip in the soft state */
275 	softsp->dip = devi;
276 
277 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
278 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
279 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
280 		    instance, "portid"));
281 		goto bad;
282 	}
283 
284 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
285 	    instance, softsp->portid, CPU->cpu_id));
286 
287 	/* map in the registers for this device. */
288 	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
289 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
290 		    instance));
291 		goto bad;
292 	}
293 
294 	/*
295 	 * Get the label of dimms and pin routing information at memory-layout
296 	 * property if the memory controller is enabled.
297 	 *
298 	 * Basically every memory-controller node on every machine should
299 	 * have one of these properties unless the memory controller is
300 	 * physically not capable of having memory attached to it, e.g.
301 	 * Excalibur's slave processor.
302 	 */
303 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
304 	    "memory-layout", (caddr_t)&dimminfop, &len);
305 	if (err == DDI_PROP_SUCCESS) {
306 		/*
307 		 * Set the pointer and size of property in the soft state
308 		 */
309 		softsp->memlayoutp = dimminfop;
310 		softsp->size = len;
311 	} else if (err == DDI_PROP_NOT_FOUND) {
312 		/*
313 		 * This is a disable MC. Clear out the pointer and size
314 		 * of property in the soft state
315 		 */
316 		softsp->memlayoutp = NULL;
317 		softsp->size = 0;
318 	} else {
319 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
320 		    instance, dimminfop));
321 		goto bad2;
322 	}
323 
324 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
325 	    instance, dimminfop, *(uint64_t *)dimminfop, len));
326 
327 	/* Get MC registers and construct all needed data structure */
328 	if (mc_get_mcregs(softsp) == -1)
329 		goto bad1;
330 
331 	mutex_enter(&mcmutex);
332 	if (nmcs == 1) {
333 		if (&p2get_mem_unum)
334 			p2get_mem_unum = mc_get_mem_unum;
335 		if (&p2get_mem_info)
336 			p2get_mem_info = mc_get_mem_info;
337 		if (&p2get_mem_sid)
338 			p2get_mem_sid = mc_get_mem_sid;
339 		if (&p2get_mem_offset)
340 			p2get_mem_offset = mc_get_mem_offset;
341 		if (&p2get_mem_addr)
342 			p2get_mem_addr = mc_get_mem_addr;
343 		if (&p2init_sid_cache)
344 			p2init_sid_cache = mc_init_sid_cache;
345 	}
346 
347 	mutex_exit(&mcmutex);
348 
349 	/*
350 	 * Update DIMM serial id information if the DIMM serial id
351 	 * cache has already been initialized.
352 	 */
353 	if (mc_dimm_sids) {
354 		rw_enter(&mcdimmsids_rw, RW_WRITER);
355 		(void) mc_populate_sid_cache();
356 		rw_exit(&mcdimmsids_rw);
357 	}
358 
359 	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
360 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
361 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
362 		    " failed \n"));
363 		goto bad1;
364 	}
365 
366 	ddi_report_dev(devi);
367 	return (DDI_SUCCESS);
368 
369 bad1:
370 	/* release all allocated data struture for this MC */
371 	mlayout_del(softsp->portid, 0);
372 	if (softsp->memlayoutp != NULL)
373 		kmem_free(softsp->memlayoutp, softsp->size);
374 
375 	/* remove the libdevinfo property */
376 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
377 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
378 	    MEM_CFG_PROP_NAME) == 1) {
379 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
380 			MEM_CFG_PROP_NAME);
381 	}
382 
383 bad2:
384 	/* unmap the registers for this device. */
385 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
386 
387 bad:
388 	ddi_soft_state_free(mcp, instance);
389 	return (DDI_FAILURE);
390 }
391 
392 /* ARGSUSED */
393 static int
394 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
395 {
396 	int instance;
397 	struct mc_soft_state *softsp;
398 
399 	/* get the instance of this devi */
400 	instance = ddi_get_instance(devi);
401 
402 	/* get the soft state pointer for this device node */
403 	softsp = ddi_get_soft_state(mcp, instance);
404 
405 	switch (cmd) {
406 	case DDI_SUSPEND:
407 		return (DDI_SUCCESS);
408 
409 	case DDI_DETACH:
410 		break;
411 
412 	default:
413 		return (DDI_FAILURE);
414 	}
415 
416 	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
417 	    instance, softsp->portid, softsp->memlayoutp));
418 
419 	/* remove the libdevinfo property */
420 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
421 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
422 	    MEM_CFG_PROP_NAME) == 1) {
423 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
424 			MEM_CFG_PROP_NAME);
425 	}
426 
427 	/* release all allocated data struture for this MC */
428 	mlayout_del(softsp->portid, 1);
429 	if (softsp->memlayoutp != NULL)
430 		kmem_free(softsp->memlayoutp, softsp->size);
431 
432 	/* unmap the registers */
433 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
434 
435 	mutex_enter(&mcmutex);
436 	if (nmcs == 0) {
437 		if (&p2get_mem_unum)
438 			p2get_mem_unum = NULL;
439 		if (&p2get_mem_info)
440 			p2get_mem_info = NULL;
441 		if (&p2get_mem_sid)
442 			p2get_mem_sid = NULL;
443 		if (&p2get_mem_offset)
444 			p2get_mem_offset = NULL;
445 		if (&p2get_mem_addr)
446 			p2get_mem_addr = NULL;
447 		if (&p2init_sid_cache)
448 			p2init_sid_cache = NULL;
449 	}
450 
451 	mutex_exit(&mcmutex);
452 
453 	ddi_remove_minor_node(devi, NULL);
454 
455 	/* free up the soft state */
456 	ddi_soft_state_free(mcp, instance);
457 
458 	return (DDI_SUCCESS);
459 }
460 
461 /* ARGSUSED */
462 static int
463 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
464 {
465 	int status = 0;
466 
467 	/* verify that otyp is appropriate */
468 	if (otyp != OTYP_CHR) {
469 		return (EINVAL);
470 	}
471 
472 	mutex_enter(&mcmutex);
473 	if (mc_is_open) {
474 		status = EBUSY;
475 		goto bad;
476 	}
477 	mc_is_open = 1;
478 bad:
479 	mutex_exit(&mcmutex);
480 	return (status);
481 }
482 
483 /* ARGSUSED */
484 static int
485 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
486 {
487 	mutex_enter(&mcmutex);
488 	mc_is_open = 0;
489 	mutex_exit(&mcmutex);
490 
491 	return (0);
492 }
493 
494 /*
495  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
496  * MCIOC_CTRLCONF, MCIOC_CONTROL.
497  *
498  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
499  * associated with various length struct. If given number is less than the
500  * number in kernel, update the number and return EINVAL so that user could
501  * allocate enough space for it.
502  *
503  */
504 
505 /* ARGSUSED */
506 static int
507 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
508 	int *rval_p)
509 {
510 	size_t	size;
511 	struct mc_memconf mcmconf;
512 	struct mc_memory *mcmem, mcmem_in;
513 	struct mc_segment *mcseg, mcseg_in;
514 	struct mc_bank mcbank;
515 	struct mc_devgrp mcdevgrp;
516 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
517 	struct mc_control *mccontrol, mccontrol_in;
518 	struct seg_info *seg = NULL;
519 	struct bank_info *bank = NULL;
520 	struct dgrp_info *dgrp = NULL;
521 	struct mctrl_info *mcport;
522 	mc_dlist_t *mctrl;
523 	int i, status = 0;
524 	cpu_t *cpu;
525 
526 	switch (cmd) {
527 	case MCIOC_MEMCONF:
528 		mutex_enter(&mcdatamutex);
529 
530 		mcmconf.nmcs = nmcs;
531 		mcmconf.nsegments = nsegments;
532 		mcmconf.nbanks = maxbanks;
533 		mcmconf.ndevgrps = NDGRPS;
534 		mcmconf.ndevs = NDIMMS;
535 		mcmconf.len_dev = MAX_DEVLEN;
536 		mcmconf.xfer_size = TRANSFER_SIZE;
537 
538 		mutex_exit(&mcdatamutex);
539 
540 		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
541 			return (EFAULT);
542 		return (0);
543 
544 	/*
545 	 * input: nsegments and allocate space for various length of segmentids
546 	 *
547 	 * return    0: size, number of segments, and all segment ids,
548 	 *		where glocal and local ids are identical.
549 	 *	EINVAL: if the given nsegments is less than that in kernel and
550 	 *		nsegments of struct will be updated.
551 	 *	EFAULT: if other errors in kernel.
552 	 */
553 	case MCIOC_MEM:
554 		if (copyin((void *)arg, &mcmem_in,
555 		    sizeof (struct mc_memory)) != 0)
556 			return (EFAULT);
557 
558 		mutex_enter(&mcdatamutex);
559 		if (mcmem_in.nsegments < nsegments) {
560 			mcmem_in.nsegments = nsegments;
561 			if (copyout(&mcmem_in, (void *)arg,
562 			    sizeof (struct mc_memory)))
563 				status = EFAULT;
564 			else
565 				status = EINVAL;
566 
567 			mutex_exit(&mcdatamutex);
568 			return (status);
569 		}
570 
571 		size = sizeof (struct mc_memory) + (nsegments - 1) *
572 		    sizeof (mcmem->segmentids[0]);
573 		mcmem = kmem_zalloc(size, KM_SLEEP);
574 
575 		mcmem->size = memsize;
576 		mcmem->nsegments = nsegments;
577 		seg = (struct seg_info *)seg_head;
578 		for (i = 0; i < nsegments; i++) {
579 			ASSERT(seg != NULL);
580 			mcmem->segmentids[i].globalid = seg->seg_node.id;
581 			mcmem->segmentids[i].localid = seg->seg_node.id;
582 			seg = (struct seg_info *)seg->seg_node.next;
583 		}
584 		mutex_exit(&mcdatamutex);
585 
586 		if (copyout(mcmem, (void *)arg, size))
587 			status = EFAULT;
588 
589 		kmem_free(mcmem, size);
590 		return (status);
591 
592 	/*
593 	 * input: id, nbanks and allocate space for various length of bankids
594 	 *
595 	 * return    0: base, size, number of banks, and all bank ids,
596 	 *		where global id is unique of all banks and local id
597 	 *		is only unique for mc.
598 	 *	EINVAL: either id isn't found or if given nbanks is less than
599 	 *		that in kernel and nbanks of struct will be updated.
600 	 *	EFAULT: if other errors in kernel.
601 	 */
602 	case MCIOC_SEG:
603 
604 		if (copyin((void *)arg, &mcseg_in,
605 		    sizeof (struct mc_segment)) != 0)
606 			return (EFAULT);
607 
608 		mutex_enter(&mcdatamutex);
609 		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
610 		    seg_head)) == NULL) {
611 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
612 			    "id %d\n", mcseg_in.id));
613 			mutex_exit(&mcdatamutex);
614 			return (EFAULT);
615 		}
616 
617 		if (mcseg_in.nbanks < seg->nbanks) {
618 			mcseg_in.nbanks = seg->nbanks;
619 			if (copyout(&mcseg_in, (void *)arg,
620 			    sizeof (struct mc_segment)))
621 				status = EFAULT;
622 			else
623 				status = EINVAL;
624 
625 			mutex_exit(&mcdatamutex);
626 			return (status);
627 		}
628 
629 		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
630 		    sizeof (mcseg->bankids[0]);
631 		mcseg = kmem_zalloc(size, KM_SLEEP);
632 
633 		mcseg->id = seg->seg_node.id;
634 		mcseg->ifactor = seg->ifactor;
635 		mcseg->base = seg->base;
636 		mcseg->size = seg->size;
637 		mcseg->nbanks = seg->nbanks;
638 
639 		bank = seg->hb_inseg;
640 
641 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
642 		    seg->nbanks, seg, bank));
643 
644 		i = 0;
645 		while (bank != NULL) {
646 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
647 			    i, bank->bank_node.id));
648 			mcseg->bankids[i].globalid = bank->bank_node.id;
649 			mcseg->bankids[i++].localid =
650 			    bank->local_id;
651 			bank = bank->n_inseg;
652 		}
653 		ASSERT(i == seg->nbanks);
654 		mutex_exit(&mcdatamutex);
655 
656 		if (copyout(mcseg, (void *)arg, size))
657 			status = EFAULT;
658 
659 		kmem_free(mcseg, size);
660 		return (status);
661 
662 	/*
663 	 * input: id
664 	 *
665 	 * return    0: mask, match, size, and devgrpid,
666 	 *		where global id is unique of all devgrps and local id
667 	 *		is only unique for mc.
668 	 *	EINVAL: if id isn't found
669 	 *	EFAULT: if other errors in kernel.
670 	 */
671 	case MCIOC_BANK:
672 		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
673 			return (EFAULT);
674 
675 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
676 
677 		mutex_enter(&mcdatamutex);
678 
679 		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
680 		    bank_head)) == NULL) {
681 			mutex_exit(&mcdatamutex);
682 			return (EINVAL);
683 		}
684 
685 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
686 		    bank->bank_node.id, bank, bank->valid));
687 
688 		/*
689 		 * If (Physic Address & MASK) == MATCH, Physic Address is
690 		 * located at this bank. The lower physical address bits
691 		 * are at [9-6].
692 		 */
693 		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
694 		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
695 		mcbank.match = bank->lm << MADR_LPA_SHIFT;
696 		mcbank.size = bank->size;
697 		mcbank.devgrpid.globalid = bank->devgrp_id;
698 		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
699 
700 		mutex_exit(&mcdatamutex);
701 
702 		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
703 			return (EFAULT);
704 		return (0);
705 
706 	/*
707 	 * input:id and allocate space for various length of deviceids
708 	 *
709 	 * return    0: size and number of devices.
710 	 *	EINVAL: id isn't found
711 	 *	EFAULT: if other errors in kernel.
712 	 */
713 	case MCIOC_DEVGRP:
714 
715 		if (copyin((void *)arg, &mcdevgrp,
716 		    sizeof (struct mc_devgrp)) != 0)
717 			return (EFAULT);
718 
719 		mutex_enter(&mcdatamutex);
720 		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
721 		    dgrp_head)) == NULL) {
722 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
723 			    "%d\n", mcdevgrp.id));
724 			mutex_exit(&mcdatamutex);
725 			return (EINVAL);
726 		}
727 
728 		mcdevgrp.ndevices = dgrp->ndevices;
729 		mcdevgrp.size = dgrp->size;
730 
731 		mutex_exit(&mcdatamutex);
732 
733 		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
734 			status = EFAULT;
735 
736 		return (status);
737 
738 	/*
739 	 * input: nmcs and allocate space for various length of mcids
740 	 *
741 	 * return    0: number of mc, and all mcids,
742 	 *		where glocal and local ids are identical.
743 	 *	EINVAL: if the given nmcs is less than that in kernel and
744 	 *		nmcs of struct will be updated.
745 	 *	EFAULT: if other errors in kernel.
746 	 */
747 	case MCIOC_CTRLCONF:
748 		if (copyin((void *)arg, &mcctrlconf_in,
749 		    sizeof (struct mc_ctrlconf)) != 0)
750 			return (EFAULT);
751 
752 		mutex_enter(&mcdatamutex);
753 		if (mcctrlconf_in.nmcs < nmcs) {
754 			mcctrlconf_in.nmcs = nmcs;
755 			if (copyout(&mcctrlconf_in, (void *)arg,
756 			    sizeof (struct mc_ctrlconf)))
757 				status = EFAULT;
758 			else
759 				status = EINVAL;
760 
761 			mutex_exit(&mcdatamutex);
762 			return (status);
763 		}
764 
765 		/*
766 		 * Cannot just use the size of the struct because of the various
767 		 * length struct
768 		 */
769 		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
770 		    sizeof (mcctrlconf->mcids[0]));
771 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
772 
773 		mcctrlconf->nmcs = nmcs;
774 
775 		/* Get all MC ids and add to mcctrlconf */
776 		mctrl = mctrl_head;
777 		i = 0;
778 		while (mctrl != NULL) {
779 			mcctrlconf->mcids[i].globalid = mctrl->id;
780 			mcctrlconf->mcids[i].localid = mctrl->id;
781 			i++;
782 			mctrl = mctrl->next;
783 		}
784 		ASSERT(i == nmcs);
785 
786 		mutex_exit(&mcdatamutex);
787 
788 		if (copyout(mcctrlconf, (void *)arg, size))
789 			status = EFAULT;
790 
791 		kmem_free(mcctrlconf, size);
792 		return (status);
793 
794 	/*
795 	 * input:id, ndevgrps and allocate space for various length of devgrpids
796 	 *
797 	 * return    0: number of devgrp, and all devgrpids,
798 	 *		is unique of all devgrps and local id is only unique
799 	 *		for mc.
800 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
801 	 *		less than that in kernel and ndevgrps of struct will
802 	 *		be updated.
803 	 *	EFAULT: if other errors in kernel.
804 	 */
805 	case MCIOC_CONTROL:
806 		if (copyin((void *)arg, &mccontrol_in,
807 		    sizeof (struct mc_control)) != 0)
808 			return (EFAULT);
809 
810 		mutex_enter(&mcdatamutex);
811 		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
812 		    mctrl_head)) == NULL) {
813 			mutex_exit(&mcdatamutex);
814 			return (EINVAL);
815 		}
816 
817 		/*
818 		 * mcport->ndevgrps zero means Memory Controller is disable.
819 		 */
820 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
821 		    (mcport->ndevgrps == 0)) {
822 			mccontrol_in.ndevgrps = mcport->ndevgrps;
823 			if (copyout(&mccontrol_in, (void *)arg,
824 			    sizeof (struct mc_control)))
825 				status = EFAULT;
826 			else if (mcport->ndevgrps != 0)
827 				status = EINVAL;
828 
829 			mutex_exit(&mcdatamutex);
830 			return (status);
831 		}
832 
833 		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
834 		    sizeof (mccontrol->devgrpids[0]);
835 		mccontrol = kmem_zalloc(size, KM_SLEEP);
836 
837 		mccontrol->id = mcport->mctrl_node.id;
838 		mccontrol->ndevgrps = mcport->ndevgrps;
839 		for (i = 0; i < mcport->ndevgrps; i++) {
840 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
841 			mccontrol->devgrpids[i].localid =
842 			    mcport->devgrpids[i] % NDGRPS;
843 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
844 			    *(uint64_t *)&mccontrol->devgrpids[i]));
845 		}
846 		mutex_exit(&mcdatamutex);
847 
848 		if (copyout(mccontrol, (void *)arg, size))
849 			status = EFAULT;
850 
851 		kmem_free(mccontrol, size);
852 		return (status);
853 
854 	/*
855 	 * input:id
856 	 *
857 	 * return    0: CPU flushed successfully.
858 	 *	EINVAL: the id wasn't found
859 	 */
860 	case MCIOC_ECFLUSH:
861 		mutex_enter(&cpu_lock);
862 		cpu = cpu_get((processorid_t)arg);
863 		mutex_exit(&cpu_lock);
864 		if (cpu == NULL)
865 			return (EINVAL);
866 
867 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
868 
869 		return (0);
870 
871 	default:
872 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
873 		return (EFAULT);
874 	}
875 }
876 
877 /*
878  * Get Memory Address Decoding Registers and construct list.
879  * flag is to workaround Cheetah's restriction where register cannot be mapped
880  * if port id(MC registers on it) == cpu id(process is running on it).
881  */
882 static int
883 mc_get_mcregs(struct mc_soft_state *softsp)
884 {
885 	int i;
886 	int err = 0;
887 	uint64_t madreg;
888 	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
889 
890 	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
891 	mc_construct(softsp->portid, softsp->memlayoutp);
892 
893 	/*
894 	 * If memlayoutp is NULL, the Memory Controller is disable, and
895 	 * doesn't need to create any bank and segment.
896 	 */
897 	if (softsp->memlayoutp == NULL)
898 		goto exit;
899 
900 	/*
901 	 * Get the content of 4 Memory Address Decoding Registers, and
902 	 * construct lists of logical banks and segments.
903 	 */
904 	for (i = 0; i < NBANKS; i++) {
905 		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
906 		    "cpu=%d\n", softsp->mc_base, softsp->portid, CPU->cpu_id));
907 
908 		kpreempt_disable();
909 		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
910 			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
911 		else
912 			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
913 			    (i * REGOFFSET)));
914 		kpreempt_enable();
915 
916 		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
917 		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
918 
919 		ma_reg_array[i] = madreg;
920 
921 		if ((err = mlayout_add(softsp->portid, i, madreg,
922 		    softsp->memlayoutp)) == -1)
923 			break;
924 	}
925 
926 	/*
927 	 * Create the logical bank property for this mc node. This
928 	 * property is an encoded array of the madr for each logical
929 	 * bank (there are NBANKS of these).
930 	 */
931 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
932 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
933 	    MEM_CFG_PROP_NAME) != 1) {
934 		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
935 			DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
936 			(caddr_t)&ma_reg_array, sizeof (ma_reg_array));
937 	}
938 
939 exit:
940 	if (!err) {
941 		mutex_enter(&mcdatamutex);
942 		nmcs++;
943 		mutex_exit(&mcdatamutex);
944 	}
945 	return (err);
946 }
947 
948 /*
949  * Translate a <DIMM, offset> pair to a physical address.
950  */
951 static int
952 mc_offset_to_addr(struct seg_info *seg,
953     struct bank_info *bank, uint64_t off, uint64_t *addr)
954 {
955 	uint64_t base, size, line, remainder;
956 	uint32_t ifactor;
957 
958 	/*
959 	 * Compute the half-dimm size in bytes.
960 	 * Note that bank->size represents the number of data bytes,
961 	 * and does not include the additional bits used for ecc, mtag,
962 	 * and mtag ecc information in each 144-bit checkword.
963 	 * For calculating the offset to a checkword we need the size
964 	 * including the additional 8 bytes for each 64 data bytes of
965 	 * a cache line.
966 	 */
967 	size = ((bank->size / 4) / 64) * 72;
968 
969 	/*
970 	 * Check if the offset is within this bank. This depends on the position
971 	 * of the bank, i.e., whether it is the front bank or the back bank.
972 	 */
973 	base = size * bank->pos;
974 
975 	if ((off < base) || (off >= (base + size)))
976 		return (-1);
977 
978 	/*
979 	 * Compute the offset within the half-dimm.
980 	 */
981 	off -= base;
982 
983 	/*
984 	 * Compute the line within the half-dimm. This is the same as the line
985 	 * within the bank since each DIMM in a bank contributes uniformly
986 	 * 144 bits (18 bytes) to a cache line.
987 	 */
988 	line = off / QWORD_SIZE_BYTES;
989 
990 	remainder = off % QWORD_SIZE_BYTES;
991 
992 	/*
993 	 * Compute the line within the segment.
994 	 * The bank->lm field indicates the order in which cache lines are
995 	 * distributed across the banks of a segment (See the Cheetah PRM).
996 	 * The interleave factor the bank is programmed with is used instead
997 	 * of the segment interleave factor since a segment can be composed
998 	 * of banks with different interleave factors if the banks are not
999 	 * uniform in size.
1000 	 */
1001 	ifactor = (bank->lk ^ 0xF) + 1;
1002 	line = (line * ifactor) + bank->lm;
1003 
1004 	/*
1005 	 * Compute the physical address assuming that there are 64 data bytes
1006 	 * in a cache line.
1007 	 */
1008 	*addr = (line << 6) + seg->base;
1009 	*addr += remainder * 16;
1010 
1011 	return (0);
1012 }
1013 
1014 /*
1015  * Translate a physical address to a <DIMM, offset> pair.
1016  */
1017 static void
1018 mc_addr_to_offset(struct seg_info *seg,
1019     struct bank_info *bank, uint64_t addr, uint64_t *off)
1020 {
1021 	uint64_t base, size, line, remainder;
1022 	uint32_t ifactor;
1023 
1024 	/*
1025 	 * Compute the line within the segment assuming that there are 64 data
1026 	 * bytes in a cache line.
1027 	 */
1028 	line = (addr - seg->base) / 64;
1029 
1030 	/*
1031 	 * The lm (lower match) field from the Memory Address Decoding Register
1032 	 * for this bank determines which lines within a memory segment this
1033 	 * bank should respond to.  These are the actual address bits the
1034 	 * interleave is done over (See the Cheetah PRM).
1035 	 * In other words, the lm field indicates the order in which the cache
1036 	 * lines are distributed across the banks of a segment, and thusly it
1037 	 * can be used to compute the line within this bank. This is the same as
1038 	 * the line within the half-dimm. This is because each DIMM in a bank
1039 	 * contributes uniformly to every cache line.
1040 	 */
1041 	ifactor = (bank->lk ^ 0xF) + 1;
1042 	line = (line - bank->lm)/ifactor;
1043 
1044 	/*
1045 	 * Compute the offset within the half-dimm. This depends on whether
1046 	 * or not the bank is a front logical bank or a back logical bank.
1047 	 */
1048 	*off = line * QWORD_SIZE_BYTES;
1049 
1050 	/*
1051 	 * Compute the half-dimm size in bytes.
1052 	 * Note that bank->size represents the number of data bytes,
1053 	 * and does not include the additional bits used for ecc, mtag,
1054 	 * and mtag ecc information in each 144-bit quadword.
1055 	 * For calculating the offset to a checkword we need the size
1056 	 * including the additional 8 bytes for each 64 data bytes of
1057 	 * a cache line.
1058 	 */
1059 	size = ((bank->size / 4) / 64) * 72;
1060 
1061 	/*
1062 	 * Compute the offset within the dimm to the nearest line. This depends
1063 	 * on whether or not the bank is a front logical bank or a back logical
1064 	 * bank.
1065 	 */
1066 	base = size * bank->pos;
1067 	*off += base;
1068 
1069 	remainder = (addr - seg->base) % 64;
1070 	remainder /= 16;
1071 	*off += remainder;
1072 }
1073 
1074 /*
1075  * A cache line is composed of four quadwords with the associated ECC, the
1076  * MTag along with its associated ECC. This is depicted below:
1077  *
1078  * |                    Data                    |   ECC   | Mtag |MTag ECC|
1079  *  127                                         0 8       0 2    0 3      0
1080  *
1081  * synd_code will be mapped as the following order to mc_get_mem_unum.
1082  *  143                                         16        7      4        0
1083  *
1084  * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
1085  *  575         432 431         288 287         144 143		   0
1086  *
1087  * dimm table: each bit at a cache line needs two bits to present one of
1088  *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
1089  *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
1090  *
1091  * pin table: each bit at a cache line needs one byte to present pin position,
1092  *      where max. is 230. So it needs 576 bytes. The order of table index is
1093  *      the same as bit position at a cache line, i.e. pin_table[0] presents
1094  *      for bit 0, Mtag ECC 0 of Quadword 3.
1095  *
1096  * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
1097  * Referring to Figure 3-4, Excalibur Architecture Manual.
1098  * This table could be moved to cheetah.c if other platform teams agree with
1099  * the bit layout at QuadWord.
1100  */
1101 
1102 static uint8_t qwordmap[] =
1103 {
1104 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1105 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1106 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1107 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1108 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1109 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1110 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1111 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1112 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
1113 };
1114 
1115 
1116 /* ARGSUSED */
1117 static int
1118 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1119 {
1120 	int i, upper_pa, lower_pa, dimmoffset;
1121 	int quadword, pos_cacheline, position, index, idx4dimm;
1122 	int qwlayout = synd_code;
1123 	short offset, data;
1124 	char unum[UNUM_NAMLEN];
1125 	struct dimm_info *dimmp;
1126 	struct pin_info *pinp;
1127 	struct bank_info *bank;
1128 
1129 	/*
1130 	 * Enforce old Openboot requirement for synd code, either a single-bit
1131 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1132 	 */
1133 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1134 		return (EINVAL);
1135 
1136 	unum[0] = '\0';
1137 
1138 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1139 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1140 
1141 	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
1142 
1143 	/*
1144 	 * Scan all logical banks to get one responding to the physical
1145 	 * address. Then compute the index to look up dimm and pin tables
1146 	 * to generate the unum.
1147 	 */
1148 	mutex_enter(&mcdatamutex);
1149 	bank = (struct bank_info *)bank_head;
1150 	while (bank != NULL) {
1151 		int bankid, mcid, bankno_permc;
1152 
1153 		bankid = bank->bank_node.id;
1154 		bankno_permc = bankid % NBANKS;
1155 		mcid = bankid / NBANKS;
1156 
1157 		/*
1158 		 * The Address Decoding logic decodes the different fields
1159 		 * in the Memory Address Decoding register to determine
1160 		 * whether a particular logical bank should respond to a
1161 		 * physical address.
1162 		 */
1163 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1164 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1165 			bank = (struct bank_info *)bank->bank_node.next;
1166 			continue;
1167 		}
1168 
1169 		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
1170 
1171 		dimmp = (struct dimm_info *)bank->dimminfop;
1172 		ASSERT(dimmp != NULL);
1173 
1174 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1175 			/*
1176 			 * single-bit error handling, we can identify specific
1177 			 * DIMM.
1178 			 */
1179 
1180 			pinp = (struct pin_info *)&dimmp->data[0];
1181 
1182 			if (!dimmp->sym_flag)
1183 				pinp++;
1184 
1185 			quadword = (paddr & 0x3f) / 16;
1186 			/* or quadword = (paddr >> 4) % 4; */
1187 			pos_cacheline = ((3 - quadword) * QWORD_SIZE) +
1188 			    qwordmap[qwlayout];
1189 			position = 575 - pos_cacheline;
1190 			index = position * 2 / 8;
1191 			offset = position % 4;
1192 
1193 			/*
1194 			 * Trade-off: We couldn't add pin number to
1195 			 * unum string because statistic number
1196 			 * pumps up at the corresponding dimm not pin.
1197 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1198 			 * pinp->pintable[pos_cacheline]);
1199 			 */
1200 			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1201 			    (uint_t)pinp->pintable[pos_cacheline]));
1202 			data = pinp->dimmtable[index];
1203 			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1204 
1205 			(void) strncpy(unum,
1206 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1207 			    UNUM_NAMLEN);
1208 			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1209 			/*
1210 			 * platform hook for adding label information to unum.
1211 			 */
1212 			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1213 			    idx4dimm);
1214 		} else {
1215 			char *p = unum;
1216 			size_t res = UNUM_NAMLEN;
1217 
1218 			/*
1219 			 * multi-bit error handling, we can only identify
1220 			 * bank of DIMMs.
1221 			 */
1222 
1223 			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1224 				(void) snprintf(p, res, "%s%s",
1225 				    i == 0 ? "" : " ",
1226 				    (char *)dimmp->label[dimmoffset + i]);
1227 				res -= strlen(p);
1228 				p += strlen(p);
1229 			}
1230 
1231 			/*
1232 			 * platform hook for adding label information
1233 			 * to unum.
1234 			 */
1235 			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1236 		}
1237 		mutex_exit(&mcdatamutex);
1238 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1239 		    (strlen(unum) >= buflen)) {
1240 			return (ENOSPC);
1241 		} else {
1242 			(void) strncpy(buf, unum, buflen);
1243 			*lenp = strlen(buf);
1244 			return (0);
1245 		}
1246 	}	/* end of while loop for logical bank list */
1247 
1248 	mutex_exit(&mcdatamutex);
1249 	return (ENXIO);
1250 }
1251 
1252 /* ARGSUSED */
1253 static int
1254 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
1255 {
1256 	int upper_pa, lower_pa;
1257 	struct bank_info *bank;
1258 	struct seg_info *seg;
1259 
1260 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1261 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1262 
1263 	/*
1264 	 * Scan all logical banks to get one responding to the physical
1265 	 * address.
1266 	 */
1267 	mutex_enter(&mcdatamutex);
1268 	bank = (struct bank_info *)bank_head;
1269 	while (bank != NULL) {
1270 		/*
1271 		 * The Address Decoding logic decodes the different fields
1272 		 * in the Memory Address Decoding register to determine
1273 		 * whether a particular logical bank should respond to a
1274 		 * physical address.
1275 		 */
1276 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1277 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1278 			bank = (struct bank_info *)bank->bank_node.next;
1279 			continue;
1280 		}
1281 
1282 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1283 		ASSERT(seg != NULL);
1284 		ASSERT(paddr >= seg->base);
1285 
1286 		mc_addr_to_offset(seg, bank, paddr, offp);
1287 
1288 		mutex_exit(&mcdatamutex);
1289 		return (0);
1290 	}
1291 
1292 	mutex_exit(&mcdatamutex);
1293 	return (ENXIO);
1294 }
1295 
1296 /*
1297  * Translate a DIMM <id, offset> pair to a physical address.
1298  */
1299 static int
1300 mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr)
1301 {
1302 	struct seg_info *seg;
1303 	struct bank_info *bank;
1304 	int first_seg_id;
1305 	int i, found;
1306 
1307 	ASSERT(sid != NULL);
1308 
1309 	mutex_enter(&mcdatamutex);
1310 
1311 	rw_enter(&mcdimmsids_rw, RW_READER);
1312 
1313 	/*
1314 	 * If DIMM serial ids have not been cached yet, tell the
1315 	 * caller to try again.
1316 	 */
1317 	if (mc_dimm_sids == NULL) {
1318 		rw_exit(&mcdimmsids_rw);
1319 		return (EAGAIN);
1320 	}
1321 
1322 	for (i = 0; i < max_entries; i++) {
1323 		if (mc_dimm_sids[i].mcid == mcid)
1324 			break;
1325 	}
1326 
1327 	if (i == max_entries) {
1328 		rw_exit(&mcdimmsids_rw);
1329 		mutex_exit(&mcdatamutex);
1330 		return (ENODEV);
1331 	}
1332 
1333 	first_seg_id = mc_dimm_sids[i].seg_id;
1334 
1335 	seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head);
1336 
1337 	rw_exit(&mcdimmsids_rw);
1338 
1339 	if (seg == NULL) {
1340 		mutex_exit(&mcdatamutex);
1341 		return (ENODEV);
1342 	}
1343 
1344 	found = 0;
1345 
1346 	for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) {
1347 		ASSERT(bank->valid);
1348 
1349 		for (i = 0; i < NDIMMS; i++) {
1350 			if (strncmp((char *)bank->dimmsidp[i], sid,
1351 			    DIMM_SERIAL_ID_LEN)  == 0)
1352 				break;
1353 		}
1354 
1355 		if (i == NDIMMS)
1356 			continue;
1357 
1358 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1)
1359 			continue;
1360 		found = 1;
1361 		break;
1362 	}
1363 
1364 	if (found) {
1365 		mutex_exit(&mcdatamutex);
1366 		return (0);
1367 	}
1368 
1369 	/*
1370 	 * If a bank wasn't found, it may be in another segment.
1371 	 * This can happen if the different logical banks of an MC
1372 	 * have different interleave factors.  To deal with this
1373 	 * possibility, we'll do a brute-force search for banks
1374 	 * for this MC with a different seg id then above.
1375 	 */
1376 	bank = (struct bank_info *)bank_head;
1377 	while (bank != NULL) {
1378 
1379 		if (!bank->valid) {
1380 			bank = (struct bank_info *)bank->bank_node.next;
1381 			continue;
1382 		}
1383 
1384 		if (bank->bank_node.id / NBANKS != mcid) {
1385 			bank = (struct bank_info *)bank->bank_node.next;
1386 			continue;
1387 		}
1388 
1389 		/* Ignore banks in the segment we looked in above. */
1390 		if (bank->seg_id == mc_dimm_sids[i].seg_id) {
1391 			bank = (struct bank_info *)bank->bank_node.next;
1392 			continue;
1393 		}
1394 
1395 		for (i = 0; i < NDIMMS; i++) {
1396 			if (strncmp((char *)bank->dimmsidp[i], sid,
1397 			    DIMM_SERIAL_ID_LEN)  == 0)
1398 				break;
1399 		}
1400 
1401 		if (i == NDIMMS) {
1402 			bank = (struct bank_info *)bank->bank_node.next;
1403 			continue;
1404 		}
1405 
1406 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1407 
1408 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1) {
1409 			bank = (struct bank_info *)bank->bank_node.next;
1410 			continue;
1411 		}
1412 
1413 		found = 1;
1414 		break;
1415 	}
1416 
1417 	mutex_exit(&mcdatamutex);
1418 
1419 	if (found)
1420 		return (0);
1421 	else
1422 		return (ENOENT);
1423 }
1424 
1425 static int
1426 mc_get_mem_info(int synd_code, uint64_t paddr,
1427     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1428     int *segsp, int *banksp, int *mcidp)
1429 {
1430 	int upper_pa, lower_pa;
1431 	struct bank_info *bankp;
1432 
1433 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1434 		return (EINVAL);
1435 
1436 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1437 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1438 
1439 	/*
1440 	 * Scan all logical banks to get one responding to the physical
1441 	 * address.
1442 	 */
1443 	mutex_enter(&mcdatamutex);
1444 	bankp = (struct bank_info *)bank_head;
1445 	while (bankp != NULL) {
1446 		struct seg_info *segp;
1447 		int bankid, mcid;
1448 
1449 		bankid = bankp->bank_node.id;
1450 		mcid = bankid / NBANKS;
1451 
1452 		/*
1453 		 * The Address Decoding logic decodes the different fields
1454 		 * in the Memory Address Decoding register to determine
1455 		 * whether a particular logical bank should respond to a
1456 		 * physical address.
1457 		 */
1458 		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1459 		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1460 			bankp = (struct bank_info *)bankp->bank_node.next;
1461 			continue;
1462 		}
1463 
1464 		/*
1465 		 * Get the corresponding segment.
1466 		 */
1467 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1468 		    seg_head)) == NULL) {
1469 			mutex_exit(&mcdatamutex);
1470 			return (EFAULT);
1471 		}
1472 
1473 		*mem_sizep = memsize;
1474 		*seg_sizep = segp->size;
1475 		*bank_sizep = bankp->size;
1476 		*segsp = nsegments;
1477 		*banksp = segp->nbanks;
1478 		*mcidp = mcid;
1479 
1480 		mutex_exit(&mcdatamutex);
1481 
1482 		return (0);
1483 
1484 	}	/* end of while loop for logical bank list */
1485 
1486 	mutex_exit(&mcdatamutex);
1487 	return (ENXIO);
1488 }
1489 
1490 /*
1491  * Construct lists for an enabled MC where size of memory is 0.
1492  * The lists are connected as follows:
1493  * Attached MC -> device group list -> device list(per devgrp).
1494  */
1495 static void
1496 mc_construct(int mc_id, void *dimminfop)
1497 {
1498 	int i, j, idx, dmidx;
1499 	struct mctrl_info *mctrl;
1500 	struct dgrp_info *dgrp;
1501 	struct device_info *dev;
1502 	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1503 
1504 	mutex_enter(&mcdatamutex);
1505 	/* allocate for mctrl_info and bank_info */
1506 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1507 	    mctrl_head)) != NULL) {
1508 		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1509 		mutex_exit(&mcdatamutex);
1510 		return;
1511 	}
1512 
1513 	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1514 
1515 	/*
1516 	 * If dimminfop is NULL, the Memory Controller is disable, and
1517 	 * the number of device group will be zero.
1518 	 */
1519 	if (dimminfop == NULL) {
1520 		mctrl->mctrl_node.id = mc_id;
1521 		mctrl->ndevgrps = 0;
1522 		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1523 		mutex_exit(&mcdatamutex);
1524 		return;
1525 	}
1526 
1527 	/* add the entry on dgrp_info list */
1528 	for (i = 0; i < NDGRPS; i++) {
1529 		idx = mc_id * NDGRPS + i;
1530 		mctrl->devgrpids[i] = idx;
1531 		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1532 		    != NULL) {
1533 			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1534 			    idx);
1535 			continue;
1536 		}
1537 
1538 		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1539 
1540 		/* add the entry on device_info list */
1541 		for (j = 0; j < NDIMMS; j++) {
1542 			dmidx = idx * NDIMMS + j;
1543 			dgrp->deviceids[j] = dmidx;
1544 			if ((dev = (struct device_info *)
1545 			    mc_node_get(dmidx, device_head)) != NULL) {
1546 				cmn_err(CE_WARN, "mc_construct: device %d "
1547 				    "exists\n", dmidx);
1548 				continue;
1549 			}
1550 			dev = kmem_zalloc(sizeof (struct device_info),
1551 			    KM_SLEEP);
1552 			dev->dev_node.id = dmidx;
1553 			dev->size = 0;
1554 			(void) strncpy(dev->label, (char *)
1555 			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1556 
1557 			mc_node_add((mc_dlist_t *)dev, &device_head,
1558 			    &device_tail);
1559 		}	/* for loop for constructing device_info */
1560 
1561 		dgrp->dgrp_node.id = idx;
1562 		dgrp->ndevices = NDIMMS;
1563 		dgrp->size = 0;
1564 		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1565 
1566 	}	/* end of for loop for constructing dgrp_info list */
1567 
1568 	mctrl->mctrl_node.id = mc_id;
1569 	mctrl->ndevgrps = NDGRPS;
1570 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1571 	mutex_exit(&mcdatamutex);
1572 }
1573 
1574 /*
1575  * Construct lists for Memory Configuration at logical viewpoint.
1576  *
1577  * Retrieve information from Memory Address Decoding Register and set up
1578  * bank and segment lists. Link bank to its corresponding device group, and
1579  * update size of device group and devices. Also connect bank to the segment.
1580  *
1581  * Memory Address Decoding Register
1582  * -------------------------------------------------------------------------
1583  * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1584  * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1585  * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1586  * -------------------------------------------------------------------------
1587  *
1588  */
1589 
1590 static int
1591 mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1592 {
1593 	int i, dmidx, idx;
1594 	uint32_t ifactor;
1595 	int status = 0;
1596 	uint64_t size, base;
1597 	struct seg_info *seg_curr;
1598 	struct bank_info *bank_curr;
1599 	struct dgrp_info *dgrp;
1600 	struct device_info *dev;
1601 	union {
1602 		struct {
1603 			uint64_t valid	: 1;
1604 			uint64_t resrv1	: 10;
1605 			uint64_t uk	: 12;
1606 			uint64_t resrv2	: 4;
1607 			uint64_t um	: 17;
1608 			uint64_t resrv3	: 2;
1609 			uint64_t lk	: 4;
1610 			uint64_t resrv4	: 2;
1611 			uint64_t lm	: 4;
1612 			uint64_t resrv5	: 8;
1613 		} _s;
1614 		uint64_t madreg;
1615 	} mcreg;
1616 
1617 	mcreg.madreg = reg;
1618 
1619 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1620 	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1621 
1622 	/* add the entry on bank_info list */
1623 	idx = mc_id * NBANKS + bank_no;
1624 
1625 	mutex_enter(&mcdatamutex);
1626 	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1627 	    != NULL) {
1628 		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1629 		goto exit;
1630 	}
1631 
1632 	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1633 	bank_curr->bank_node.id = idx;
1634 	bank_curr->valid = mcreg._s.valid;
1635 	bank_curr->dimminfop = dimminfop;
1636 
1637 	if (!mcreg._s.valid) {
1638 		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1639 		goto exit;
1640 	}
1641 
1642 	/*
1643 	 * size of a logical bank = size of segment / interleave factor
1644 	 * This fomula is not only working for regular configuration,
1645 	 * i.e. number of banks at a segment equals to the max
1646 	 * interleave factor, but also for special case, say 3 bank
1647 	 * interleave. One bank is 2 way interleave and other two are
1648 	 * 4 way. So the sizes of banks are size of segment/2 and /4
1649 	 * respectively.
1650 	 */
1651 	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1652 	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1653 	base = mcreg._s.um & ~mcreg._s.uk;
1654 	base <<= MADR_UPA_SHIFT;
1655 
1656 	bank_curr->uk = mcreg._s.uk;
1657 	bank_curr->um = mcreg._s.um;
1658 	bank_curr->lk = mcreg._s.lk;
1659 	bank_curr->lm = mcreg._s.lm;
1660 	bank_curr->size = size;
1661 
1662 	/*
1663 	 * The bank's position depends on which halves of the DIMMs it consists
1664 	 * of. The front-side halves of the 4 DIMMs constitute the front bank
1665 	 * and the back-side halves constitute the back bank. Bank numbers
1666 	 * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side
1667 	 * banks.
1668 	 */
1669 	bank_curr->pos = bank_no >> 1;
1670 	ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1));
1671 
1672 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1673 	"lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1674 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1675 
1676 	/* connect the entry and update the size on dgrp_info list */
1677 	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1678 	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1679 		/* all avaiable dgrp should be linked at mc_construct */
1680 		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1681 		kmem_free(bank_curr, sizeof (struct bank_info));
1682 		status = -1;
1683 		goto exit;
1684 	}
1685 
1686 	bank_curr->devgrp_id = idx;
1687 	dgrp->size += size;
1688 
1689 	/* Update the size of entry on device_info list */
1690 	for (i = 0; i < NDIMMS; i++) {
1691 		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1692 		dgrp->deviceids[i] = dmidx;
1693 
1694 		/* avaiable device should be linked at mc_construct */
1695 		if ((dev = (struct device_info *)mc_node_get(dmidx,
1696 		    device_head)) == NULL) {
1697 			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1698 			    dmidx);
1699 			kmem_free(bank_curr, sizeof (struct bank_info));
1700 			status = -1;
1701 			goto exit;
1702 		}
1703 
1704 		dev->size += (size / NDIMMS);
1705 
1706 		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1707 		    dmidx, size));
1708 	}
1709 
1710 	/*
1711 	 * Get the segment by matching the base address, link this bank
1712 	 * to the segment. If not matched, allocate a new segment and
1713 	 * add it at segment list.
1714 	 */
1715 	if (seg_curr = seg_match_base(base)) {
1716 		seg_curr->nbanks++;
1717 		seg_curr->size += size;
1718 		if (ifactor > seg_curr->ifactor)
1719 			seg_curr->ifactor = ifactor;
1720 		bank_curr->seg_id = seg_curr->seg_node.id;
1721 	} else {
1722 		seg_curr = (struct seg_info *)
1723 		kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1724 		bank_curr->seg_id = seg_id;
1725 		seg_curr->seg_node.id = seg_id++;
1726 		seg_curr->base = base;
1727 		seg_curr->size = size;
1728 		seg_curr->nbanks = 1;
1729 		seg_curr->ifactor = ifactor;
1730 		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1731 
1732 		nsegments++;
1733 	}
1734 
1735 	/* Get the local id of bank which is only unique per segment. */
1736 	bank_curr->local_id = seg_curr->nbanks - 1;
1737 
1738 	/* add bank at the end of the list; not sorted by bankid */
1739 	if (seg_curr->hb_inseg != NULL) {
1740 		bank_curr->p_inseg = seg_curr->tb_inseg;
1741 		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1742 		seg_curr->tb_inseg->n_inseg = bank_curr;
1743 		seg_curr->tb_inseg = bank_curr;
1744 	} else {
1745 		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1746 		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1747 	}
1748 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1749 	    seg_curr->seg_node.id));
1750 
1751 	if (mc_dimm_sids) {
1752 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1753 		mc_update_bank(bank_curr);
1754 		rw_exit(&mcdimmsids_rw);
1755 	}
1756 	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1757 
1758 	memsize += size;
1759 	if (seg_curr->nbanks > maxbanks)
1760 		maxbanks = seg_curr->nbanks;
1761 
1762 exit:
1763 	mutex_exit(&mcdatamutex);
1764 	return (status);
1765 }
1766 
1767 /*
1768  * Delete nodes related to the given MC on mc, device group, device,
1769  * and bank lists. Moreover, delete corresponding segment if its connected
1770  * banks are all removed.
1771  *
1772  * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In
1773  * this case, the DIMM data structures need to be deleted. The argument is
1774  * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case,
1775  * the DIMM data structures are left alone.
1776  */
1777 static void
1778 mlayout_del(int mc_id, int delete)
1779 {
1780 	int i, j, dgrpid, devid, bankid, ndevgrps;
1781 	struct seg_info *seg;
1782 	struct bank_info *bank_curr;
1783 	struct mctrl_info *mctrl;
1784 	mc_dlist_t *dgrp_ptr;
1785 	mc_dlist_t *dev_ptr;
1786 	uint64_t base;
1787 
1788 	mutex_enter(&mcdatamutex);
1789 
1790 	/* delete mctrl_info */
1791 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1792 	    NULL) {
1793 		ndevgrps = mctrl->ndevgrps;
1794 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1795 		kmem_free(mctrl, sizeof (struct mctrl_info));
1796 		nmcs--;
1797 
1798 		/*
1799 		 * There is no other list left for disabled MC.
1800 		 */
1801 		if (ndevgrps == 0) {
1802 			mutex_exit(&mcdatamutex);
1803 			return;
1804 		}
1805 	} else
1806 		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1807 
1808 	/* Delete device groups and devices of the detached MC */
1809 	for (i = 0; i < NDGRPS; i++) {
1810 		dgrpid = mc_id * NDGRPS + i;
1811 		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1812 			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1813 			continue;
1814 		}
1815 
1816 		for (j = 0; j < NDIMMS; j++) {
1817 			devid = dgrpid * NDIMMS + j;
1818 			if (dev_ptr = mc_node_get(devid, device_head)) {
1819 				mc_node_del(dev_ptr, &device_head,
1820 				    &device_tail);
1821 				kmem_free(dev_ptr, sizeof (struct device_info));
1822 			} else {
1823 				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1824 				    devid);
1825 			}
1826 		}
1827 
1828 		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1829 		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1830 	}
1831 
1832 	/* Delete banks and segments if it has no bank */
1833 	for (i = 0; i < NBANKS; i++) {
1834 		bankid = mc_id * NBANKS + i;
1835 		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1836 		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1837 		    bank_head))) {
1838 			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1839 			continue;
1840 		}
1841 
1842 		if (bank_curr->valid) {
1843 			base = bank_curr->um & ~bank_curr->uk;
1844 			base <<= MADR_UPA_SHIFT;
1845 			bank_curr->valid = 0;
1846 			memsize -= bank_curr->size;
1847 
1848 			/* Delete bank at segment and segment if no bank left */
1849 			if (!(seg = seg_match_base(base))) {
1850 				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1851 				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1852 				    &bank_tail);
1853 				kmem_free(bank_curr, sizeof (struct bank_info));
1854 				continue;
1855 			}
1856 
1857 			/* update the bank list at the segment */
1858 			if (bank_curr->n_inseg == NULL) {
1859 				/* node is at the tail of list */
1860 				seg->tb_inseg = bank_curr->p_inseg;
1861 			} else {
1862 				bank_curr->n_inseg->p_inseg =
1863 				    bank_curr->p_inseg;
1864 			}
1865 
1866 			if (bank_curr->p_inseg == NULL) {
1867 				/* node is at the head of list */
1868 				seg->hb_inseg = bank_curr->n_inseg;
1869 			} else {
1870 				bank_curr->p_inseg->n_inseg =
1871 				    bank_curr->n_inseg;
1872 			}
1873 
1874 			seg->nbanks--;
1875 			seg->size -= bank_curr->size;
1876 
1877 			if (seg->nbanks == 0) {
1878 				mc_node_del((mc_dlist_t *)seg, &seg_head,
1879 				    &seg_tail);
1880 				kmem_free(seg, sizeof (struct seg_info));
1881 				nsegments--;
1882 			}
1883 
1884 		}
1885 		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1886 		kmem_free(bank_curr, sizeof (struct bank_info));
1887 	}	/* end of for loop for four banks */
1888 
1889 	if (mc_dimm_sids && delete) {
1890 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1891 		i = mc_get_sid_cache_index(mc_id);
1892 		if (i >= 0) {
1893 			mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID;
1894 			if (mc_dimm_sids[i].sids) {
1895 				kmem_free(mc_dimm_sids[i].sids,
1896 				    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS));
1897 				mc_dimm_sids[i].sids = NULL;
1898 			}
1899 		}
1900 		rw_exit(&mcdimmsids_rw);
1901 	}
1902 
1903 	mutex_exit(&mcdatamutex);
1904 }
1905 
1906 /*
1907  * Search the segment in the list starting at seg_head by base address
1908  * input: base address
1909  * return: pointer of found segment or null if not found.
1910  */
1911 static struct seg_info *
1912 seg_match_base(u_longlong_t base)
1913 {
1914 	static struct seg_info *seg_ptr;
1915 
1916 	seg_ptr = (struct seg_info *)seg_head;
1917 	while (seg_ptr != NULL) {
1918 		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1919 		    seg_ptr->base, base));
1920 		if (seg_ptr->base == base)
1921 			break;
1922 		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1923 	}
1924 	return (seg_ptr);
1925 }
1926 
1927 /*
1928  * mc_dlist is a double linking list, including unique id, and pointers to
1929  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1930  * and mctrl_info has it at the top to share the operations, add, del, and get.
1931  *
1932  * The new node is added at the tail and is not sorted.
1933  *
1934  * Input: The pointer of node to be added, head and tail of the list
1935  */
1936 
1937 static void
1938 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1939 {
1940 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1941 		node->id, *head, *tail));
1942 
1943 	if (*head != NULL) {
1944 		node->prev = *tail;
1945 		node->next = (*tail)->next;
1946 		(*tail)->next = node;
1947 		*tail = node;
1948 	} else {
1949 		node->next = node->prev = NULL;
1950 		*head = *tail = node;
1951 	}
1952 }
1953 
1954 /*
1955  * Input: The pointer of node to be deleted, head and tail of the list
1956  *
1957  * Deleted node will be at the following positions
1958  * 1. At the tail of the list
1959  * 2. At the head of the list
1960  * 3. At the head and tail of the list, i.e. only one left.
1961  * 4. At the middle of the list
1962  */
1963 
1964 static void
1965 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1966 {
1967 	if (node->next == NULL) {
1968 		/* deleted node is at the tail of list */
1969 		*tail = node->prev;
1970 	} else {
1971 		node->next->prev = node->prev;
1972 	}
1973 
1974 	if (node->prev == NULL) {
1975 		/* deleted node is at the head of list */
1976 		*head = node->next;
1977 	} else {
1978 		node->prev->next = node->next;
1979 	}
1980 }
1981 
1982 /*
1983  * Search the list from the head of the list to match the given id
1984  * Input: id and the head of the list
1985  * Return: pointer of found node
1986  */
1987 static mc_dlist_t *
1988 mc_node_get(int id, mc_dlist_t *head)
1989 {
1990 	mc_dlist_t *node;
1991 
1992 	node = head;
1993 	while (node != NULL) {
1994 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1995 		    node->id, id));
1996 		if (node->id == id)
1997 			break;
1998 		node = node->next;
1999 	}
2000 	return (node);
2001 }
2002 
2003 /*
2004  * mc-us3 driver allows a platform to add extra label
2005  * information to the unum string. If a platform implements a
2006  * kernel function called plat_add_mem_unum_label() it will be
2007  * executed. This would typically be implemented in the platmod.
2008  */
2009 static void
2010 mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
2011 {
2012 	if (&plat_add_mem_unum_label)
2013 		plat_add_mem_unum_label(buf, mcid, bank, dimm);
2014 }
2015 
2016 static int
2017 mc_get_sid_cache_index(int mcid)
2018 {
2019 	int	i;
2020 
2021 	for (i = 0; i < max_entries; i++) {
2022 		if (mcid == mc_dimm_sids[i].mcid)
2023 			return (i);
2024 	}
2025 
2026 	return (-1);
2027 }
2028 
2029 static void
2030 mc_update_bank(struct bank_info *bank)
2031 {
2032 	int i, j;
2033 	int bankid, mcid, dgrp_no;
2034 
2035 	/*
2036 	 * Mark the MC if DIMM sids are not available.
2037 	 * Mark which segment the DIMMs belong to.  Allocate
2038 	 * space to store DIMM serial ids which are later
2039 	 * provided by the platform layer, and update the bank_info
2040 	 * structure with pointers to its serial ids.
2041 	 */
2042 	bankid = bank->bank_node.id;
2043 	mcid = bankid / NBANKS;
2044 	i = mc_get_sid_cache_index(mcid);
2045 	if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID)
2046 		mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED;
2047 
2048 	mc_dimm_sids[i].seg_id = bank->seg_id;
2049 
2050 	if (mc_dimm_sids[i].sids == NULL) {
2051 		mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc(
2052 		    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP);
2053 	}
2054 
2055 	dgrp_no = bank->devgrp_id % NDGRPS;
2056 
2057 	for (j = 0; j < NDIMMS; j++) {
2058 		bank->dimmsidp[j] =
2059 		    &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)];
2060 	}
2061 }
2062 
2063 static int
2064 mc_populate_sid_cache(void)
2065 {
2066 	struct bank_info	*bank;
2067 
2068 	if (&plat_populate_sid_cache == 0)
2069 		return (ENOTSUP);
2070 
2071 	ASSERT(RW_WRITE_HELD(&mcdimmsids_rw));
2072 
2073 	bank = (struct bank_info *)bank_head;
2074 	while (bank != NULL) {
2075 		if (!bank->valid) {
2076 			bank = (struct bank_info *)bank->bank_node.next;
2077 			continue;
2078 		}
2079 
2080 		mc_update_bank(bank);
2081 
2082 		bank = (struct bank_info *)bank->bank_node.next;
2083 	}
2084 
2085 
2086 	/*
2087 	 * Call to the platform layer to populate the cache
2088 	 * with DIMM serial ids.
2089 	 */
2090 	return (plat_populate_sid_cache(mc_dimm_sids, max_entries));
2091 }
2092 
2093 static void
2094 mc_init_sid_cache_thr(void)
2095 {
2096 	ASSERT(mc_dimm_sids == NULL);
2097 
2098 	mutex_enter(&mcdatamutex);
2099 	rw_enter(&mcdimmsids_rw, RW_WRITER);
2100 
2101 	mc_dimm_sids = plat_alloc_sid_cache(&max_entries);
2102 	(void) mc_populate_sid_cache();
2103 
2104 	rw_exit(&mcdimmsids_rw);
2105 	mutex_exit(&mcdatamutex);
2106 }
2107 
2108 static int
2109 mc_init_sid_cache(void)
2110 {
2111 	if (&plat_alloc_sid_cache) {
2112 		(void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0,
2113 		    &p0, TS_RUN, minclsyspri);
2114 		return (0);
2115 	} else
2116 		return (ENOTSUP);
2117 }
2118 
2119 static int
2120 mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp)
2121 {
2122 	int	i;
2123 
2124 	if (buflen < DIMM_SERIAL_ID_LEN)
2125 		return (ENOSPC);
2126 
2127 	/*
2128 	 * If DIMM serial ids have not been cached yet, tell the
2129 	 * caller to try again.
2130 	 */
2131 	if (!rw_tryenter(&mcdimmsids_rw, RW_READER))
2132 		return (EAGAIN);
2133 
2134 	if (mc_dimm_sids == NULL) {
2135 		rw_exit(&mcdimmsids_rw);
2136 		return (EAGAIN);
2137 	}
2138 
2139 	/*
2140 	 * Find dimm serial id using mcid and dimm #
2141 	 */
2142 	for (i = 0; i < max_entries; i++) {
2143 		if (mc_dimm_sids[i].mcid == mcid)
2144 			break;
2145 	}
2146 	if ((i == max_entries) || (!mc_dimm_sids[i].sids)) {
2147 		rw_exit(&mcdimmsids_rw);
2148 		return (ENOENT);
2149 	}
2150 
2151 	(void) strlcpy(buf, mc_dimm_sids[i].sids[dimm],
2152 	    DIMM_SERIAL_ID_LEN);
2153 	*lenp = strlen(buf);
2154 
2155 	rw_exit(&mcdimmsids_rw);
2156 	return (0);
2157 }
2158