xref: /illumos-gate/usr/src/uts/sun4u/io/mc-us3.c (revision a6e6969cf9cfe2070eae4cd6071f76b0fa4f539f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/stat.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/open.h>
39 #include <sys/thread.h>
40 #include <sys/cpuvar.h>
41 #include <sys/x_call.h>
42 #include <sys/debug.h>
43 #include <sys/sysmacros.h>
44 #include <sys/ivintr.h>
45 #include <sys/intr.h>
46 #include <sys/intreg.h>
47 #include <sys/autoconf.h>
48 #include <sys/modctl.h>
49 #include <sys/spl.h>
50 #include <sys/async.h>
51 #include <sys/mc.h>
52 #include <sys/mc-us3.h>
53 #include <sys/cpu_module.h>
54 #include <sys/platform_module.h>
55 
56 /*
57  * Function prototypes
58  */
59 
60 static int mc_open(dev_t *, int, int, cred_t *);
61 static int mc_close(dev_t, int, int, cred_t *);
62 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
63 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
64 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
65 
66 /*
67  * Configuration data structures
68  */
69 static struct cb_ops mc_cb_ops = {
70 	mc_open,			/* open */
71 	mc_close,			/* close */
72 	nulldev,			/* strategy */
73 	nulldev,			/* print */
74 	nodev,				/* dump */
75 	nulldev,			/* read */
76 	nulldev,			/* write */
77 	mc_ioctl,			/* ioctl */
78 	nodev,				/* devmap */
79 	nodev,				/* mmap */
80 	nodev,				/* segmap */
81 	nochpoll,			/* poll */
82 	ddi_prop_op,			/* cb_prop_op */
83 	0,				/* streamtab */
84 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
85 	CB_REV,				/* rev */
86 	nodev,				/* cb_aread */
87 	nodev				/* cb_awrite */
88 };
89 
90 static struct dev_ops mc_ops = {
91 	DEVO_REV,			/* rev */
92 	0,				/* refcnt  */
93 	ddi_getinfo_1to1,		/* getinfo */
94 	nulldev,			/* identify */
95 	nulldev,			/* probe */
96 	mc_attach,			/* attach */
97 	mc_detach,			/* detach */
98 	nulldev,			/* reset */
99 	&mc_cb_ops,			/* cb_ops */
100 	(struct bus_ops *)0,		/* bus_ops */
101 	nulldev				/* power */
102 };
103 
104 /*
105  * Driver globals
106  */
107 static void *mcp;
108 static int nmcs = 0;
109 static int seg_id = 0;
110 static int nsegments = 0;
111 static uint64_t memsize = 0;
112 static int maxbanks = 0;
113 
114 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
115 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
116 static mc_dlist_t *device_head, *device_tail;
117 
118 static kmutex_t	mcmutex;
119 static kmutex_t	mcdatamutex;
120 
121 static krwlock_t mcdimmsids_rw;
122 
123 /* pointer to cache of DIMM serial ids */
124 static dimm_sid_cache_t	*mc_dimm_sids;
125 static int		max_entries;
126 
127 extern struct mod_ops mod_driverops;
128 
129 static struct modldrv modldrv = {
130 	&mod_driverops,			/* module type, this one is a driver */
131 	"Memory-controller: %I%",	/* module name */
132 	&mc_ops,			/* driver ops */
133 };
134 
135 static struct modlinkage modlinkage = {
136 	MODREV_1,		/* rev */
137 	(void *)&modldrv,
138 	NULL
139 };
140 
141 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
142     int buflen, int *lenp);
143 static int mc_get_mem_info(int synd_code, uint64_t paddr,
144     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
145     int *segsp, int *banksp, int *mcidp);
146 static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp);
147 static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
148 static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr);
149 static int mc_init_sid_cache(void);
150 static int mc_get_mcregs(struct mc_soft_state *);
151 static void mc_construct(int mc_id, void *dimminfop);
152 static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
153 static void mlayout_del(int mc_id, int delete);
154 static struct seg_info *seg_match_base(u_longlong_t base);
155 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
156 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
157 static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
158 static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
159 static int mc_populate_sid_cache(void);
160 static int mc_get_sid_cache_index(int mcid);
161 static void mc_update_bank(struct bank_info *bank);
162 
163 #pragma weak p2get_mem_unum
164 #pragma weak p2get_mem_info
165 #pragma weak p2get_mem_sid
166 #pragma weak p2get_mem_offset
167 #pragma	weak p2get_mem_addr
168 #pragma weak p2init_sid_cache
169 #pragma weak plat_add_mem_unum_label
170 #pragma weak plat_alloc_sid_cache
171 #pragma weak plat_populate_sid_cache
172 
173 #define	QWORD_SIZE		144
174 #define	QWORD_SIZE_BYTES	(QWORD_SIZE / 8)
175 
176 /*
177  * These are the module initialization routines.
178  */
179 
180 int
181 _init(void)
182 {
183 	int error;
184 
185 	if ((error = ddi_soft_state_init(&mcp,
186 	    sizeof (struct mc_soft_state), 1)) != 0)
187 		return (error);
188 
189 	error =  mod_install(&modlinkage);
190 	if (error == 0) {
191 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
192 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
193 		rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL);
194 	}
195 
196 	return (error);
197 }
198 
199 int
200 _fini(void)
201 {
202 	int error;
203 
204 	if ((error = mod_remove(&modlinkage)) != 0)
205 		return (error);
206 
207 	ddi_soft_state_fini(&mcp);
208 	mutex_destroy(&mcmutex);
209 	mutex_destroy(&mcdatamutex);
210 	rw_destroy(&mcdimmsids_rw);
211 
212 	if (mc_dimm_sids)
213 		kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) *
214 		    max_entries);
215 
216 	return (0);
217 }
218 
219 int
220 _info(struct modinfo *modinfop)
221 {
222 	return (mod_info(&modlinkage, modinfop));
223 }
224 
225 static int
226 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
227 {
228 	struct mc_soft_state *softsp;
229 	struct dimm_info *dimminfop;
230 	int instance, len, err;
231 
232 	/* get the instance of this devi */
233 	instance = ddi_get_instance(devi);
234 
235 	switch (cmd) {
236 	case DDI_ATTACH:
237 		break;
238 
239 	case DDI_RESUME:
240 		/* get the soft state pointer for this device node */
241 		softsp = ddi_get_soft_state(mcp, instance);
242 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
243 		    instance));
244 		/*
245 		 * During resume, the source and target board's bank_infos
246 		 * need to be updated with the new mc MADR values.  This is
247 		 * implemented with existing functionality by first removing
248 		 * the props and allocated data structs, and then adding them
249 		 * back in.
250 		 */
251 		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
252 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
253 		    MEM_CFG_PROP_NAME) == 1) {
254 			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
255 			    MEM_CFG_PROP_NAME);
256 		}
257 		mlayout_del(softsp->portid, 0);
258 		if (mc_get_mcregs(softsp) == -1) {
259 			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
260 			    instance);
261 		}
262 		return (DDI_SUCCESS);
263 
264 	default:
265 		return (DDI_FAILURE);
266 	}
267 
268 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
269 		return (DDI_FAILURE);
270 
271 	softsp = ddi_get_soft_state(mcp, instance);
272 
273 	/* Set the dip in the soft state */
274 	softsp->dip = devi;
275 
276 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
277 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
278 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
279 		    instance, "portid"));
280 		goto bad;
281 	}
282 
283 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
284 	    instance, softsp->portid, CPU->cpu_id));
285 
286 	/* map in the registers for this device. */
287 	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
288 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
289 		    instance));
290 		goto bad;
291 	}
292 
293 	/*
294 	 * Get the label of dimms and pin routing information at memory-layout
295 	 * property if the memory controller is enabled.
296 	 *
297 	 * Basically every memory-controller node on every machine should
298 	 * have one of these properties unless the memory controller is
299 	 * physically not capable of having memory attached to it, e.g.
300 	 * Excalibur's slave processor.
301 	 */
302 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
303 	    "memory-layout", (caddr_t)&dimminfop, &len);
304 	if (err == DDI_PROP_SUCCESS) {
305 		/*
306 		 * Set the pointer and size of property in the soft state
307 		 */
308 		softsp->memlayoutp = dimminfop;
309 		softsp->size = len;
310 	} else if (err == DDI_PROP_NOT_FOUND) {
311 		/*
312 		 * This is a disable MC. Clear out the pointer and size
313 		 * of property in the soft state
314 		 */
315 		softsp->memlayoutp = NULL;
316 		softsp->size = 0;
317 	} else {
318 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
319 		    instance, dimminfop));
320 		goto bad2;
321 	}
322 
323 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
324 	    instance, dimminfop, *(uint64_t *)dimminfop, len));
325 
326 	/* Get MC registers and construct all needed data structure */
327 	if (mc_get_mcregs(softsp) == -1)
328 		goto bad1;
329 
330 	mutex_enter(&mcmutex);
331 	if (nmcs == 1) {
332 		if (&p2get_mem_unum)
333 			p2get_mem_unum = mc_get_mem_unum;
334 		if (&p2get_mem_info)
335 			p2get_mem_info = mc_get_mem_info;
336 		if (&p2get_mem_sid)
337 			p2get_mem_sid = mc_get_mem_sid;
338 		if (&p2get_mem_offset)
339 			p2get_mem_offset = mc_get_mem_offset;
340 		if (&p2get_mem_addr)
341 			p2get_mem_addr = mc_get_mem_addr;
342 		if (&p2init_sid_cache)
343 			p2init_sid_cache = mc_init_sid_cache;
344 	}
345 
346 	mutex_exit(&mcmutex);
347 
348 	/*
349 	 * Update DIMM serial id information if the DIMM serial id
350 	 * cache has already been initialized.
351 	 */
352 	if (mc_dimm_sids) {
353 		rw_enter(&mcdimmsids_rw, RW_WRITER);
354 		(void) mc_populate_sid_cache();
355 		rw_exit(&mcdimmsids_rw);
356 	}
357 
358 	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
359 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
360 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
361 		    " failed \n"));
362 		goto bad1;
363 	}
364 
365 	ddi_report_dev(devi);
366 	return (DDI_SUCCESS);
367 
368 bad1:
369 	/* release all allocated data struture for this MC */
370 	mlayout_del(softsp->portid, 0);
371 	if (softsp->memlayoutp != NULL)
372 		kmem_free(softsp->memlayoutp, softsp->size);
373 
374 	/* remove the libdevinfo property */
375 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
376 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
377 	    MEM_CFG_PROP_NAME) == 1) {
378 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
379 			MEM_CFG_PROP_NAME);
380 	}
381 
382 bad2:
383 	/* unmap the registers for this device. */
384 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
385 
386 bad:
387 	ddi_soft_state_free(mcp, instance);
388 	return (DDI_FAILURE);
389 }
390 
391 /* ARGSUSED */
392 static int
393 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
394 {
395 	int instance;
396 	struct mc_soft_state *softsp;
397 
398 	/* get the instance of this devi */
399 	instance = ddi_get_instance(devi);
400 
401 	/* get the soft state pointer for this device node */
402 	softsp = ddi_get_soft_state(mcp, instance);
403 
404 	switch (cmd) {
405 	case DDI_SUSPEND:
406 		return (DDI_SUCCESS);
407 
408 	case DDI_DETACH:
409 		break;
410 
411 	default:
412 		return (DDI_FAILURE);
413 	}
414 
415 	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
416 	    instance, softsp->portid, softsp->memlayoutp));
417 
418 	/* remove the libdevinfo property */
419 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
420 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
421 	    MEM_CFG_PROP_NAME) == 1) {
422 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
423 			MEM_CFG_PROP_NAME);
424 	}
425 
426 	/* release all allocated data struture for this MC */
427 	mlayout_del(softsp->portid, 1);
428 	if (softsp->memlayoutp != NULL)
429 		kmem_free(softsp->memlayoutp, softsp->size);
430 
431 	/* unmap the registers */
432 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
433 
434 	mutex_enter(&mcmutex);
435 	if (nmcs == 0) {
436 		if (&p2get_mem_unum)
437 			p2get_mem_unum = NULL;
438 		if (&p2get_mem_info)
439 			p2get_mem_info = NULL;
440 		if (&p2get_mem_sid)
441 			p2get_mem_sid = NULL;
442 		if (&p2get_mem_offset)
443 			p2get_mem_offset = NULL;
444 		if (&p2get_mem_addr)
445 			p2get_mem_addr = NULL;
446 		if (&p2init_sid_cache)
447 			p2init_sid_cache = NULL;
448 	}
449 
450 	mutex_exit(&mcmutex);
451 
452 	ddi_remove_minor_node(devi, NULL);
453 
454 	/* free up the soft state */
455 	ddi_soft_state_free(mcp, instance);
456 
457 	return (DDI_SUCCESS);
458 }
459 
460 /* ARGSUSED */
461 static int
462 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
463 {
464 
465 	/* verify that otyp is appropriate */
466 	if (otyp != OTYP_CHR) {
467 		return (EINVAL);
468 	}
469 
470 	return (0);
471 }
472 
473 /* ARGSUSED */
474 static int
475 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
476 {
477 	return (0);
478 }
479 
480 /*
481  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
482  * MCIOC_CTRLCONF, MCIOC_CONTROL.
483  *
484  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
485  * associated with various length struct. If given number is less than the
486  * number in kernel, update the number and return EINVAL so that user could
487  * allocate enough space for it.
488  *
489  */
490 
491 /* ARGSUSED */
492 static int
493 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
494 	int *rval_p)
495 {
496 	size_t	size;
497 	struct mc_memconf mcmconf;
498 	struct mc_memory *mcmem, mcmem_in;
499 	struct mc_segment *mcseg, mcseg_in;
500 	struct mc_bank mcbank;
501 	struct mc_devgrp mcdevgrp;
502 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
503 	struct mc_control *mccontrol, mccontrol_in;
504 	struct seg_info *seg = NULL;
505 	struct bank_info *bank = NULL;
506 	struct dgrp_info *dgrp = NULL;
507 	struct mctrl_info *mcport;
508 	mc_dlist_t *mctrl;
509 	int i, status = 0;
510 	cpu_t *cpu;
511 
512 	switch (cmd) {
513 	case MCIOC_MEMCONF:
514 		mutex_enter(&mcdatamutex);
515 
516 		mcmconf.nmcs = nmcs;
517 		mcmconf.nsegments = nsegments;
518 		mcmconf.nbanks = maxbanks;
519 		mcmconf.ndevgrps = NDGRPS;
520 		mcmconf.ndevs = NDIMMS;
521 		mcmconf.len_dev = MAX_DEVLEN;
522 		mcmconf.xfer_size = TRANSFER_SIZE;
523 
524 		mutex_exit(&mcdatamutex);
525 
526 		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
527 			return (EFAULT);
528 		return (0);
529 
530 	/*
531 	 * input: nsegments and allocate space for various length of segmentids
532 	 *
533 	 * return    0: size, number of segments, and all segment ids,
534 	 *		where glocal and local ids are identical.
535 	 *	EINVAL: if the given nsegments is less than that in kernel and
536 	 *		nsegments of struct will be updated.
537 	 *	EFAULT: if other errors in kernel.
538 	 */
539 	case MCIOC_MEM:
540 		if (copyin((void *)arg, &mcmem_in,
541 		    sizeof (struct mc_memory)) != 0)
542 			return (EFAULT);
543 
544 		mutex_enter(&mcdatamutex);
545 		if (mcmem_in.nsegments < nsegments) {
546 			mcmem_in.nsegments = nsegments;
547 			if (copyout(&mcmem_in, (void *)arg,
548 			    sizeof (struct mc_memory)))
549 				status = EFAULT;
550 			else
551 				status = EINVAL;
552 
553 			mutex_exit(&mcdatamutex);
554 			return (status);
555 		}
556 
557 		size = sizeof (struct mc_memory) + (nsegments - 1) *
558 		    sizeof (mcmem->segmentids[0]);
559 		mcmem = kmem_zalloc(size, KM_SLEEP);
560 
561 		mcmem->size = memsize;
562 		mcmem->nsegments = nsegments;
563 		seg = (struct seg_info *)seg_head;
564 		for (i = 0; i < nsegments; i++) {
565 			ASSERT(seg != NULL);
566 			mcmem->segmentids[i].globalid = seg->seg_node.id;
567 			mcmem->segmentids[i].localid = seg->seg_node.id;
568 			seg = (struct seg_info *)seg->seg_node.next;
569 		}
570 		mutex_exit(&mcdatamutex);
571 
572 		if (copyout(mcmem, (void *)arg, size))
573 			status = EFAULT;
574 
575 		kmem_free(mcmem, size);
576 		return (status);
577 
578 	/*
579 	 * input: id, nbanks and allocate space for various length of bankids
580 	 *
581 	 * return    0: base, size, number of banks, and all bank ids,
582 	 *		where global id is unique of all banks and local id
583 	 *		is only unique for mc.
584 	 *	EINVAL: either id isn't found or if given nbanks is less than
585 	 *		that in kernel and nbanks of struct will be updated.
586 	 *	EFAULT: if other errors in kernel.
587 	 */
588 	case MCIOC_SEG:
589 
590 		if (copyin((void *)arg, &mcseg_in,
591 		    sizeof (struct mc_segment)) != 0)
592 			return (EFAULT);
593 
594 		mutex_enter(&mcdatamutex);
595 		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
596 		    seg_head)) == NULL) {
597 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
598 			    "id %d\n", mcseg_in.id));
599 			mutex_exit(&mcdatamutex);
600 			return (EFAULT);
601 		}
602 
603 		if (mcseg_in.nbanks < seg->nbanks) {
604 			mcseg_in.nbanks = seg->nbanks;
605 			if (copyout(&mcseg_in, (void *)arg,
606 			    sizeof (struct mc_segment)))
607 				status = EFAULT;
608 			else
609 				status = EINVAL;
610 
611 			mutex_exit(&mcdatamutex);
612 			return (status);
613 		}
614 
615 		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
616 		    sizeof (mcseg->bankids[0]);
617 		mcseg = kmem_zalloc(size, KM_SLEEP);
618 
619 		mcseg->id = seg->seg_node.id;
620 		mcseg->ifactor = seg->ifactor;
621 		mcseg->base = seg->base;
622 		mcseg->size = seg->size;
623 		mcseg->nbanks = seg->nbanks;
624 
625 		bank = seg->hb_inseg;
626 
627 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
628 		    seg->nbanks, seg, bank));
629 
630 		i = 0;
631 		while (bank != NULL) {
632 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
633 			    i, bank->bank_node.id));
634 			mcseg->bankids[i].globalid = bank->bank_node.id;
635 			mcseg->bankids[i++].localid =
636 			    bank->local_id;
637 			bank = bank->n_inseg;
638 		}
639 		ASSERT(i == seg->nbanks);
640 		mutex_exit(&mcdatamutex);
641 
642 		if (copyout(mcseg, (void *)arg, size))
643 			status = EFAULT;
644 
645 		kmem_free(mcseg, size);
646 		return (status);
647 
648 	/*
649 	 * input: id
650 	 *
651 	 * return    0: mask, match, size, and devgrpid,
652 	 *		where global id is unique of all devgrps and local id
653 	 *		is only unique for mc.
654 	 *	EINVAL: if id isn't found
655 	 *	EFAULT: if other errors in kernel.
656 	 */
657 	case MCIOC_BANK:
658 		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
659 			return (EFAULT);
660 
661 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
662 
663 		mutex_enter(&mcdatamutex);
664 
665 		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
666 		    bank_head)) == NULL) {
667 			mutex_exit(&mcdatamutex);
668 			return (EINVAL);
669 		}
670 
671 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
672 		    bank->bank_node.id, bank, bank->valid));
673 
674 		/*
675 		 * If (Physic Address & MASK) == MATCH, Physic Address is
676 		 * located at this bank. The lower physical address bits
677 		 * are at [9-6].
678 		 */
679 		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
680 		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
681 		mcbank.match = bank->lm << MADR_LPA_SHIFT;
682 		mcbank.size = bank->size;
683 		mcbank.devgrpid.globalid = bank->devgrp_id;
684 		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
685 
686 		mutex_exit(&mcdatamutex);
687 
688 		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
689 			return (EFAULT);
690 		return (0);
691 
692 	/*
693 	 * input:id and allocate space for various length of deviceids
694 	 *
695 	 * return    0: size and number of devices.
696 	 *	EINVAL: id isn't found
697 	 *	EFAULT: if other errors in kernel.
698 	 */
699 	case MCIOC_DEVGRP:
700 
701 		if (copyin((void *)arg, &mcdevgrp,
702 		    sizeof (struct mc_devgrp)) != 0)
703 			return (EFAULT);
704 
705 		mutex_enter(&mcdatamutex);
706 		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
707 		    dgrp_head)) == NULL) {
708 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
709 			    "%d\n", mcdevgrp.id));
710 			mutex_exit(&mcdatamutex);
711 			return (EINVAL);
712 		}
713 
714 		mcdevgrp.ndevices = dgrp->ndevices;
715 		mcdevgrp.size = dgrp->size;
716 
717 		mutex_exit(&mcdatamutex);
718 
719 		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
720 			status = EFAULT;
721 
722 		return (status);
723 
724 	/*
725 	 * input: nmcs and allocate space for various length of mcids
726 	 *
727 	 * return    0: number of mc, and all mcids,
728 	 *		where glocal and local ids are identical.
729 	 *	EINVAL: if the given nmcs is less than that in kernel and
730 	 *		nmcs of struct will be updated.
731 	 *	EFAULT: if other errors in kernel.
732 	 */
733 	case MCIOC_CTRLCONF:
734 		if (copyin((void *)arg, &mcctrlconf_in,
735 		    sizeof (struct mc_ctrlconf)) != 0)
736 			return (EFAULT);
737 
738 		mutex_enter(&mcdatamutex);
739 		if (mcctrlconf_in.nmcs < nmcs) {
740 			mcctrlconf_in.nmcs = nmcs;
741 			if (copyout(&mcctrlconf_in, (void *)arg,
742 			    sizeof (struct mc_ctrlconf)))
743 				status = EFAULT;
744 			else
745 				status = EINVAL;
746 
747 			mutex_exit(&mcdatamutex);
748 			return (status);
749 		}
750 
751 		/*
752 		 * Cannot just use the size of the struct because of the various
753 		 * length struct
754 		 */
755 		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
756 		    sizeof (mcctrlconf->mcids[0]));
757 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
758 
759 		mcctrlconf->nmcs = nmcs;
760 
761 		/* Get all MC ids and add to mcctrlconf */
762 		mctrl = mctrl_head;
763 		i = 0;
764 		while (mctrl != NULL) {
765 			mcctrlconf->mcids[i].globalid = mctrl->id;
766 			mcctrlconf->mcids[i].localid = mctrl->id;
767 			i++;
768 			mctrl = mctrl->next;
769 		}
770 		ASSERT(i == nmcs);
771 
772 		mutex_exit(&mcdatamutex);
773 
774 		if (copyout(mcctrlconf, (void *)arg, size))
775 			status = EFAULT;
776 
777 		kmem_free(mcctrlconf, size);
778 		return (status);
779 
780 	/*
781 	 * input:id, ndevgrps and allocate space for various length of devgrpids
782 	 *
783 	 * return    0: number of devgrp, and all devgrpids,
784 	 *		is unique of all devgrps and local id is only unique
785 	 *		for mc.
786 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
787 	 *		less than that in kernel and ndevgrps of struct will
788 	 *		be updated.
789 	 *	EFAULT: if other errors in kernel.
790 	 */
791 	case MCIOC_CONTROL:
792 		if (copyin((void *)arg, &mccontrol_in,
793 		    sizeof (struct mc_control)) != 0)
794 			return (EFAULT);
795 
796 		mutex_enter(&mcdatamutex);
797 		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
798 		    mctrl_head)) == NULL) {
799 			mutex_exit(&mcdatamutex);
800 			return (EINVAL);
801 		}
802 
803 		/*
804 		 * mcport->ndevgrps zero means Memory Controller is disable.
805 		 */
806 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
807 		    (mcport->ndevgrps == 0)) {
808 			mccontrol_in.ndevgrps = mcport->ndevgrps;
809 			if (copyout(&mccontrol_in, (void *)arg,
810 			    sizeof (struct mc_control)))
811 				status = EFAULT;
812 			else if (mcport->ndevgrps != 0)
813 				status = EINVAL;
814 
815 			mutex_exit(&mcdatamutex);
816 			return (status);
817 		}
818 
819 		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
820 		    sizeof (mccontrol->devgrpids[0]);
821 		mccontrol = kmem_zalloc(size, KM_SLEEP);
822 
823 		mccontrol->id = mcport->mctrl_node.id;
824 		mccontrol->ndevgrps = mcport->ndevgrps;
825 		for (i = 0; i < mcport->ndevgrps; i++) {
826 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
827 			mccontrol->devgrpids[i].localid =
828 			    mcport->devgrpids[i] % NDGRPS;
829 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
830 			    *(uint64_t *)&mccontrol->devgrpids[i]));
831 		}
832 		mutex_exit(&mcdatamutex);
833 
834 		if (copyout(mccontrol, (void *)arg, size))
835 			status = EFAULT;
836 
837 		kmem_free(mccontrol, size);
838 		return (status);
839 
840 	/*
841 	 * input:id
842 	 *
843 	 * return    0: CPU flushed successfully.
844 	 *	EINVAL: the id wasn't found
845 	 */
846 	case MCIOC_ECFLUSH:
847 		mutex_enter(&cpu_lock);
848 		cpu = cpu_get((processorid_t)arg);
849 		mutex_exit(&cpu_lock);
850 		if (cpu == NULL)
851 			return (EINVAL);
852 
853 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
854 
855 		return (0);
856 
857 	default:
858 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
859 		return (EFAULT);
860 	}
861 }
862 
863 /*
864  * Get Memory Address Decoding Registers and construct list.
865  * flag is to workaround Cheetah's restriction where register cannot be mapped
866  * if port id(MC registers on it) == cpu id(process is running on it).
867  */
868 static int
869 mc_get_mcregs(struct mc_soft_state *softsp)
870 {
871 	int i;
872 	int err = 0;
873 	uint64_t madreg;
874 	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
875 
876 	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
877 	mc_construct(softsp->portid, softsp->memlayoutp);
878 
879 	/*
880 	 * If memlayoutp is NULL, the Memory Controller is disable, and
881 	 * doesn't need to create any bank and segment.
882 	 */
883 	if (softsp->memlayoutp == NULL)
884 		goto exit;
885 
886 	/*
887 	 * Get the content of 4 Memory Address Decoding Registers, and
888 	 * construct lists of logical banks and segments.
889 	 */
890 	for (i = 0; i < NBANKS; i++) {
891 		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
892 		    "cpu=%d\n", softsp->mc_base, softsp->portid, CPU->cpu_id));
893 
894 		kpreempt_disable();
895 		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
896 			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
897 		else
898 			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
899 			    (i * REGOFFSET)));
900 		kpreempt_enable();
901 
902 		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
903 		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
904 
905 		ma_reg_array[i] = madreg;
906 
907 		if ((err = mlayout_add(softsp->portid, i, madreg,
908 		    softsp->memlayoutp)) == -1)
909 			break;
910 	}
911 
912 	/*
913 	 * Create the logical bank property for this mc node. This
914 	 * property is an encoded array of the madr for each logical
915 	 * bank (there are NBANKS of these).
916 	 */
917 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
918 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
919 	    MEM_CFG_PROP_NAME) != 1) {
920 		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
921 			DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
922 			(caddr_t)&ma_reg_array, sizeof (ma_reg_array));
923 	}
924 
925 exit:
926 	if (!err) {
927 		mutex_enter(&mcdatamutex);
928 		nmcs++;
929 		mutex_exit(&mcdatamutex);
930 	}
931 	return (err);
932 }
933 
934 /*
935  * Translate a <DIMM, offset> pair to a physical address.
936  */
937 static int
938 mc_offset_to_addr(struct seg_info *seg,
939     struct bank_info *bank, uint64_t off, uint64_t *addr)
940 {
941 	uint64_t base, size, line, remainder;
942 	uint32_t ifactor;
943 
944 	/*
945 	 * Compute the half-dimm size in bytes.
946 	 * Note that bank->size represents the number of data bytes,
947 	 * and does not include the additional bits used for ecc, mtag,
948 	 * and mtag ecc information in each 144-bit checkword.
949 	 * For calculating the offset to a checkword we need the size
950 	 * including the additional 8 bytes for each 64 data bytes of
951 	 * a cache line.
952 	 */
953 	size = ((bank->size / 4) / 64) * 72;
954 
955 	/*
956 	 * Check if the offset is within this bank. This depends on the position
957 	 * of the bank, i.e., whether it is the front bank or the back bank.
958 	 */
959 	base = size * bank->pos;
960 
961 	if ((off < base) || (off >= (base + size)))
962 		return (-1);
963 
964 	/*
965 	 * Compute the offset within the half-dimm.
966 	 */
967 	off -= base;
968 
969 	/*
970 	 * Compute the line within the half-dimm. This is the same as the line
971 	 * within the bank since each DIMM in a bank contributes uniformly
972 	 * 144 bits (18 bytes) to a cache line.
973 	 */
974 	line = off / QWORD_SIZE_BYTES;
975 
976 	remainder = off % QWORD_SIZE_BYTES;
977 
978 	/*
979 	 * Compute the line within the segment.
980 	 * The bank->lm field indicates the order in which cache lines are
981 	 * distributed across the banks of a segment (See the Cheetah PRM).
982 	 * The interleave factor the bank is programmed with is used instead
983 	 * of the segment interleave factor since a segment can be composed
984 	 * of banks with different interleave factors if the banks are not
985 	 * uniform in size.
986 	 */
987 	ifactor = (bank->lk ^ 0xF) + 1;
988 	line = (line * ifactor) + bank->lm;
989 
990 	/*
991 	 * Compute the physical address assuming that there are 64 data bytes
992 	 * in a cache line.
993 	 */
994 	*addr = (line << 6) + seg->base;
995 	*addr += remainder * 16;
996 
997 	return (0);
998 }
999 
1000 /*
1001  * Translate a physical address to a <DIMM, offset> pair.
1002  */
1003 static void
1004 mc_addr_to_offset(struct seg_info *seg,
1005     struct bank_info *bank, uint64_t addr, uint64_t *off)
1006 {
1007 	uint64_t base, size, line, remainder;
1008 	uint32_t ifactor;
1009 
1010 	/*
1011 	 * Compute the line within the segment assuming that there are 64 data
1012 	 * bytes in a cache line.
1013 	 */
1014 	line = (addr - seg->base) / 64;
1015 
1016 	/*
1017 	 * The lm (lower match) field from the Memory Address Decoding Register
1018 	 * for this bank determines which lines within a memory segment this
1019 	 * bank should respond to.  These are the actual address bits the
1020 	 * interleave is done over (See the Cheetah PRM).
1021 	 * In other words, the lm field indicates the order in which the cache
1022 	 * lines are distributed across the banks of a segment, and thusly it
1023 	 * can be used to compute the line within this bank. This is the same as
1024 	 * the line within the half-dimm. This is because each DIMM in a bank
1025 	 * contributes uniformly to every cache line.
1026 	 */
1027 	ifactor = (bank->lk ^ 0xF) + 1;
1028 	line = (line - bank->lm)/ifactor;
1029 
1030 	/*
1031 	 * Compute the offset within the half-dimm. This depends on whether
1032 	 * or not the bank is a front logical bank or a back logical bank.
1033 	 */
1034 	*off = line * QWORD_SIZE_BYTES;
1035 
1036 	/*
1037 	 * Compute the half-dimm size in bytes.
1038 	 * Note that bank->size represents the number of data bytes,
1039 	 * and does not include the additional bits used for ecc, mtag,
1040 	 * and mtag ecc information in each 144-bit quadword.
1041 	 * For calculating the offset to a checkword we need the size
1042 	 * including the additional 8 bytes for each 64 data bytes of
1043 	 * a cache line.
1044 	 */
1045 	size = ((bank->size / 4) / 64) * 72;
1046 
1047 	/*
1048 	 * Compute the offset within the dimm to the nearest line. This depends
1049 	 * on whether or not the bank is a front logical bank or a back logical
1050 	 * bank.
1051 	 */
1052 	base = size * bank->pos;
1053 	*off += base;
1054 
1055 	remainder = (addr - seg->base) % 64;
1056 	remainder /= 16;
1057 	*off += remainder;
1058 }
1059 
1060 /*
1061  * A cache line is composed of four quadwords with the associated ECC, the
1062  * MTag along with its associated ECC. This is depicted below:
1063  *
1064  * |                    Data                    |   ECC   | Mtag |MTag ECC|
1065  *  127                                         0 8       0 2    0 3      0
1066  *
1067  * synd_code will be mapped as the following order to mc_get_mem_unum.
1068  *  143                                         16        7      4        0
1069  *
1070  * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
1071  *  575         432 431         288 287         144 143		   0
1072  *
1073  * dimm table: each bit at a cache line needs two bits to present one of
1074  *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
1075  *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
1076  *
1077  * pin table: each bit at a cache line needs one byte to present pin position,
1078  *      where max. is 230. So it needs 576 bytes. The order of table index is
1079  *      the same as bit position at a cache line, i.e. pin_table[0] presents
1080  *      for bit 0, Mtag ECC 0 of Quadword 3.
1081  *
1082  * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
1083  * Referring to Figure 3-4, Excalibur Architecture Manual.
1084  * This table could be moved to cheetah.c if other platform teams agree with
1085  * the bit layout at QuadWord.
1086  */
1087 
1088 static uint8_t qwordmap[] =
1089 {
1090 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1091 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1092 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1093 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1094 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1095 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1096 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1097 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1098 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
1099 };
1100 
1101 
1102 /* ARGSUSED */
1103 static int
1104 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1105 {
1106 	int i, upper_pa, lower_pa, dimmoffset;
1107 	int quadword, pos_cacheline, position, index, idx4dimm;
1108 	int qwlayout = synd_code;
1109 	short offset, data;
1110 	char unum[UNUM_NAMLEN];
1111 	struct dimm_info *dimmp;
1112 	struct pin_info *pinp;
1113 	struct bank_info *bank;
1114 
1115 	/*
1116 	 * Enforce old Openboot requirement for synd code, either a single-bit
1117 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1118 	 */
1119 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1120 		return (EINVAL);
1121 
1122 	unum[0] = '\0';
1123 
1124 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1125 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1126 
1127 	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
1128 
1129 	/*
1130 	 * Scan all logical banks to get one responding to the physical
1131 	 * address. Then compute the index to look up dimm and pin tables
1132 	 * to generate the unum.
1133 	 */
1134 	mutex_enter(&mcdatamutex);
1135 	bank = (struct bank_info *)bank_head;
1136 	while (bank != NULL) {
1137 		int bankid, mcid, bankno_permc;
1138 
1139 		bankid = bank->bank_node.id;
1140 		bankno_permc = bankid % NBANKS;
1141 		mcid = bankid / NBANKS;
1142 
1143 		/*
1144 		 * The Address Decoding logic decodes the different fields
1145 		 * in the Memory Address Decoding register to determine
1146 		 * whether a particular logical bank should respond to a
1147 		 * physical address.
1148 		 */
1149 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1150 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1151 			bank = (struct bank_info *)bank->bank_node.next;
1152 			continue;
1153 		}
1154 
1155 		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
1156 
1157 		dimmp = (struct dimm_info *)bank->dimminfop;
1158 		ASSERT(dimmp != NULL);
1159 
1160 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1161 			/*
1162 			 * single-bit error handling, we can identify specific
1163 			 * DIMM.
1164 			 */
1165 
1166 			pinp = (struct pin_info *)&dimmp->data[0];
1167 
1168 			if (!dimmp->sym_flag)
1169 				pinp++;
1170 
1171 			quadword = (paddr & 0x3f) / 16;
1172 			/* or quadword = (paddr >> 4) % 4; */
1173 			pos_cacheline = ((3 - quadword) * QWORD_SIZE) +
1174 			    qwordmap[qwlayout];
1175 			position = 575 - pos_cacheline;
1176 			index = position * 2 / 8;
1177 			offset = position % 4;
1178 
1179 			/*
1180 			 * Trade-off: We couldn't add pin number to
1181 			 * unum string because statistic number
1182 			 * pumps up at the corresponding dimm not pin.
1183 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1184 			 * pinp->pintable[pos_cacheline]);
1185 			 */
1186 			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1187 			    (uint_t)pinp->pintable[pos_cacheline]));
1188 			data = pinp->dimmtable[index];
1189 			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1190 
1191 			(void) strncpy(unum,
1192 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1193 			    UNUM_NAMLEN);
1194 			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1195 			/*
1196 			 * platform hook for adding label information to unum.
1197 			 */
1198 			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1199 			    idx4dimm);
1200 		} else {
1201 			char *p = unum;
1202 			size_t res = UNUM_NAMLEN;
1203 
1204 			/*
1205 			 * multi-bit error handling, we can only identify
1206 			 * bank of DIMMs.
1207 			 */
1208 
1209 			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1210 				(void) snprintf(p, res, "%s%s",
1211 				    i == 0 ? "" : " ",
1212 				    (char *)dimmp->label[dimmoffset + i]);
1213 				res -= strlen(p);
1214 				p += strlen(p);
1215 			}
1216 
1217 			/*
1218 			 * platform hook for adding label information
1219 			 * to unum.
1220 			 */
1221 			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1222 		}
1223 		mutex_exit(&mcdatamutex);
1224 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1225 		    (strlen(unum) >= buflen)) {
1226 			return (ENOSPC);
1227 		} else {
1228 			(void) strncpy(buf, unum, buflen);
1229 			*lenp = strlen(buf);
1230 			return (0);
1231 		}
1232 	}	/* end of while loop for logical bank list */
1233 
1234 	mutex_exit(&mcdatamutex);
1235 	return (ENXIO);
1236 }
1237 
1238 /* ARGSUSED */
1239 static int
1240 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
1241 {
1242 	int upper_pa, lower_pa;
1243 	struct bank_info *bank;
1244 	struct seg_info *seg;
1245 
1246 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1247 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1248 
1249 	/*
1250 	 * Scan all logical banks to get one responding to the physical
1251 	 * address.
1252 	 */
1253 	mutex_enter(&mcdatamutex);
1254 	bank = (struct bank_info *)bank_head;
1255 	while (bank != NULL) {
1256 		/*
1257 		 * The Address Decoding logic decodes the different fields
1258 		 * in the Memory Address Decoding register to determine
1259 		 * whether a particular logical bank should respond to a
1260 		 * physical address.
1261 		 */
1262 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1263 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1264 			bank = (struct bank_info *)bank->bank_node.next;
1265 			continue;
1266 		}
1267 
1268 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1269 		ASSERT(seg != NULL);
1270 		ASSERT(paddr >= seg->base);
1271 
1272 		mc_addr_to_offset(seg, bank, paddr, offp);
1273 
1274 		mutex_exit(&mcdatamutex);
1275 		return (0);
1276 	}
1277 
1278 	mutex_exit(&mcdatamutex);
1279 	return (ENXIO);
1280 }
1281 
1282 /*
1283  * Translate a DIMM <id, offset> pair to a physical address.
1284  */
1285 static int
1286 mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr)
1287 {
1288 	struct seg_info *seg;
1289 	struct bank_info *bank;
1290 	int first_seg_id;
1291 	int i, found;
1292 
1293 	ASSERT(sid != NULL);
1294 
1295 	mutex_enter(&mcdatamutex);
1296 
1297 	rw_enter(&mcdimmsids_rw, RW_READER);
1298 
1299 	/*
1300 	 * If DIMM serial ids have not been cached yet, tell the
1301 	 * caller to try again.
1302 	 */
1303 	if (mc_dimm_sids == NULL) {
1304 		rw_exit(&mcdimmsids_rw);
1305 		return (EAGAIN);
1306 	}
1307 
1308 	for (i = 0; i < max_entries; i++) {
1309 		if (mc_dimm_sids[i].mcid == mcid)
1310 			break;
1311 	}
1312 
1313 	if (i == max_entries) {
1314 		rw_exit(&mcdimmsids_rw);
1315 		mutex_exit(&mcdatamutex);
1316 		return (ENODEV);
1317 	}
1318 
1319 	first_seg_id = mc_dimm_sids[i].seg_id;
1320 
1321 	seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head);
1322 
1323 	rw_exit(&mcdimmsids_rw);
1324 
1325 	if (seg == NULL) {
1326 		mutex_exit(&mcdatamutex);
1327 		return (ENODEV);
1328 	}
1329 
1330 	found = 0;
1331 
1332 	for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) {
1333 		ASSERT(bank->valid);
1334 
1335 		for (i = 0; i < NDIMMS; i++) {
1336 			if (strncmp((char *)bank->dimmsidp[i], sid,
1337 			    DIMM_SERIAL_ID_LEN)  == 0)
1338 				break;
1339 		}
1340 
1341 		if (i == NDIMMS)
1342 			continue;
1343 
1344 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1)
1345 			continue;
1346 		found = 1;
1347 		break;
1348 	}
1349 
1350 	if (found) {
1351 		mutex_exit(&mcdatamutex);
1352 		return (0);
1353 	}
1354 
1355 	/*
1356 	 * If a bank wasn't found, it may be in another segment.
1357 	 * This can happen if the different logical banks of an MC
1358 	 * have different interleave factors.  To deal with this
1359 	 * possibility, we'll do a brute-force search for banks
1360 	 * for this MC with a different seg id then above.
1361 	 */
1362 	bank = (struct bank_info *)bank_head;
1363 	while (bank != NULL) {
1364 
1365 		if (!bank->valid) {
1366 			bank = (struct bank_info *)bank->bank_node.next;
1367 			continue;
1368 		}
1369 
1370 		if (bank->bank_node.id / NBANKS != mcid) {
1371 			bank = (struct bank_info *)bank->bank_node.next;
1372 			continue;
1373 		}
1374 
1375 		/* Ignore banks in the segment we looked in above. */
1376 		if (bank->seg_id == mc_dimm_sids[i].seg_id) {
1377 			bank = (struct bank_info *)bank->bank_node.next;
1378 			continue;
1379 		}
1380 
1381 		for (i = 0; i < NDIMMS; i++) {
1382 			if (strncmp((char *)bank->dimmsidp[i], sid,
1383 			    DIMM_SERIAL_ID_LEN)  == 0)
1384 				break;
1385 		}
1386 
1387 		if (i == NDIMMS) {
1388 			bank = (struct bank_info *)bank->bank_node.next;
1389 			continue;
1390 		}
1391 
1392 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1393 
1394 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1) {
1395 			bank = (struct bank_info *)bank->bank_node.next;
1396 			continue;
1397 		}
1398 
1399 		found = 1;
1400 		break;
1401 	}
1402 
1403 	mutex_exit(&mcdatamutex);
1404 
1405 	if (found)
1406 		return (0);
1407 	else
1408 		return (ENOENT);
1409 }
1410 
1411 static int
1412 mc_get_mem_info(int synd_code, uint64_t paddr,
1413     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1414     int *segsp, int *banksp, int *mcidp)
1415 {
1416 	int upper_pa, lower_pa;
1417 	struct bank_info *bankp;
1418 
1419 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1420 		return (EINVAL);
1421 
1422 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1423 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1424 
1425 	/*
1426 	 * Scan all logical banks to get one responding to the physical
1427 	 * address.
1428 	 */
1429 	mutex_enter(&mcdatamutex);
1430 	bankp = (struct bank_info *)bank_head;
1431 	while (bankp != NULL) {
1432 		struct seg_info *segp;
1433 		int bankid, mcid;
1434 
1435 		bankid = bankp->bank_node.id;
1436 		mcid = bankid / NBANKS;
1437 
1438 		/*
1439 		 * The Address Decoding logic decodes the different fields
1440 		 * in the Memory Address Decoding register to determine
1441 		 * whether a particular logical bank should respond to a
1442 		 * physical address.
1443 		 */
1444 		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1445 		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1446 			bankp = (struct bank_info *)bankp->bank_node.next;
1447 			continue;
1448 		}
1449 
1450 		/*
1451 		 * Get the corresponding segment.
1452 		 */
1453 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1454 		    seg_head)) == NULL) {
1455 			mutex_exit(&mcdatamutex);
1456 			return (EFAULT);
1457 		}
1458 
1459 		*mem_sizep = memsize;
1460 		*seg_sizep = segp->size;
1461 		*bank_sizep = bankp->size;
1462 		*segsp = nsegments;
1463 		*banksp = segp->nbanks;
1464 		*mcidp = mcid;
1465 
1466 		mutex_exit(&mcdatamutex);
1467 
1468 		return (0);
1469 
1470 	}	/* end of while loop for logical bank list */
1471 
1472 	mutex_exit(&mcdatamutex);
1473 	return (ENXIO);
1474 }
1475 
1476 /*
1477  * Construct lists for an enabled MC where size of memory is 0.
1478  * The lists are connected as follows:
1479  * Attached MC -> device group list -> device list(per devgrp).
1480  */
1481 static void
1482 mc_construct(int mc_id, void *dimminfop)
1483 {
1484 	int i, j, idx, dmidx;
1485 	struct mctrl_info *mctrl;
1486 	struct dgrp_info *dgrp;
1487 	struct device_info *dev;
1488 	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1489 
1490 	mutex_enter(&mcdatamutex);
1491 	/* allocate for mctrl_info and bank_info */
1492 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1493 	    mctrl_head)) != NULL) {
1494 		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1495 		mutex_exit(&mcdatamutex);
1496 		return;
1497 	}
1498 
1499 	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1500 
1501 	/*
1502 	 * If dimminfop is NULL, the Memory Controller is disable, and
1503 	 * the number of device group will be zero.
1504 	 */
1505 	if (dimminfop == NULL) {
1506 		mctrl->mctrl_node.id = mc_id;
1507 		mctrl->ndevgrps = 0;
1508 		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1509 		mutex_exit(&mcdatamutex);
1510 		return;
1511 	}
1512 
1513 	/* add the entry on dgrp_info list */
1514 	for (i = 0; i < NDGRPS; i++) {
1515 		idx = mc_id * NDGRPS + i;
1516 		mctrl->devgrpids[i] = idx;
1517 		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1518 		    != NULL) {
1519 			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1520 			    idx);
1521 			continue;
1522 		}
1523 
1524 		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1525 
1526 		/* add the entry on device_info list */
1527 		for (j = 0; j < NDIMMS; j++) {
1528 			dmidx = idx * NDIMMS + j;
1529 			dgrp->deviceids[j] = dmidx;
1530 			if ((dev = (struct device_info *)
1531 			    mc_node_get(dmidx, device_head)) != NULL) {
1532 				cmn_err(CE_WARN, "mc_construct: device %d "
1533 				    "exists\n", dmidx);
1534 				continue;
1535 			}
1536 			dev = kmem_zalloc(sizeof (struct device_info),
1537 			    KM_SLEEP);
1538 			dev->dev_node.id = dmidx;
1539 			dev->size = 0;
1540 			(void) strncpy(dev->label, (char *)
1541 			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1542 
1543 			mc_node_add((mc_dlist_t *)dev, &device_head,
1544 			    &device_tail);
1545 		}	/* for loop for constructing device_info */
1546 
1547 		dgrp->dgrp_node.id = idx;
1548 		dgrp->ndevices = NDIMMS;
1549 		dgrp->size = 0;
1550 		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1551 
1552 	}	/* end of for loop for constructing dgrp_info list */
1553 
1554 	mctrl->mctrl_node.id = mc_id;
1555 	mctrl->ndevgrps = NDGRPS;
1556 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1557 	mutex_exit(&mcdatamutex);
1558 }
1559 
1560 /*
1561  * Construct lists for Memory Configuration at logical viewpoint.
1562  *
1563  * Retrieve information from Memory Address Decoding Register and set up
1564  * bank and segment lists. Link bank to its corresponding device group, and
1565  * update size of device group and devices. Also connect bank to the segment.
1566  *
1567  * Memory Address Decoding Register
1568  * -------------------------------------------------------------------------
1569  * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1570  * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1571  * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1572  * -------------------------------------------------------------------------
1573  *
1574  */
1575 
1576 static int
1577 mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1578 {
1579 	int i, dmidx, idx;
1580 	uint32_t ifactor;
1581 	int status = 0;
1582 	uint64_t size, base;
1583 	struct seg_info *seg_curr;
1584 	struct bank_info *bank_curr;
1585 	struct dgrp_info *dgrp;
1586 	struct device_info *dev;
1587 	union {
1588 		struct {
1589 			uint64_t valid	: 1;
1590 			uint64_t resrv1	: 10;
1591 			uint64_t uk	: 12;
1592 			uint64_t resrv2	: 4;
1593 			uint64_t um	: 17;
1594 			uint64_t resrv3	: 2;
1595 			uint64_t lk	: 4;
1596 			uint64_t resrv4	: 2;
1597 			uint64_t lm	: 4;
1598 			uint64_t resrv5	: 8;
1599 		} _s;
1600 		uint64_t madreg;
1601 	} mcreg;
1602 
1603 	mcreg.madreg = reg;
1604 
1605 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1606 	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1607 
1608 	/* add the entry on bank_info list */
1609 	idx = mc_id * NBANKS + bank_no;
1610 
1611 	mutex_enter(&mcdatamutex);
1612 	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1613 	    != NULL) {
1614 		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1615 		goto exit;
1616 	}
1617 
1618 	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1619 	bank_curr->bank_node.id = idx;
1620 	bank_curr->valid = mcreg._s.valid;
1621 	bank_curr->dimminfop = dimminfop;
1622 
1623 	if (!mcreg._s.valid) {
1624 		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1625 		goto exit;
1626 	}
1627 
1628 	/*
1629 	 * size of a logical bank = size of segment / interleave factor
1630 	 * This fomula is not only working for regular configuration,
1631 	 * i.e. number of banks at a segment equals to the max
1632 	 * interleave factor, but also for special case, say 3 bank
1633 	 * interleave. One bank is 2 way interleave and other two are
1634 	 * 4 way. So the sizes of banks are size of segment/2 and /4
1635 	 * respectively.
1636 	 */
1637 	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1638 	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1639 	base = mcreg._s.um & ~mcreg._s.uk;
1640 	base <<= MADR_UPA_SHIFT;
1641 
1642 	bank_curr->uk = mcreg._s.uk;
1643 	bank_curr->um = mcreg._s.um;
1644 	bank_curr->lk = mcreg._s.lk;
1645 	bank_curr->lm = mcreg._s.lm;
1646 	bank_curr->size = size;
1647 
1648 	/*
1649 	 * The bank's position depends on which halves of the DIMMs it consists
1650 	 * of. The front-side halves of the 4 DIMMs constitute the front bank
1651 	 * and the back-side halves constitute the back bank. Bank numbers
1652 	 * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side
1653 	 * banks.
1654 	 */
1655 	bank_curr->pos = bank_no >> 1;
1656 	ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1));
1657 
1658 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1659 	"lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1660 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1661 
1662 	/* connect the entry and update the size on dgrp_info list */
1663 	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1664 	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1665 		/* all avaiable dgrp should be linked at mc_construct */
1666 		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1667 		kmem_free(bank_curr, sizeof (struct bank_info));
1668 		status = -1;
1669 		goto exit;
1670 	}
1671 
1672 	bank_curr->devgrp_id = idx;
1673 	dgrp->size += size;
1674 
1675 	/* Update the size of entry on device_info list */
1676 	for (i = 0; i < NDIMMS; i++) {
1677 		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1678 		dgrp->deviceids[i] = dmidx;
1679 
1680 		/* avaiable device should be linked at mc_construct */
1681 		if ((dev = (struct device_info *)mc_node_get(dmidx,
1682 		    device_head)) == NULL) {
1683 			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1684 			    dmidx);
1685 			kmem_free(bank_curr, sizeof (struct bank_info));
1686 			status = -1;
1687 			goto exit;
1688 		}
1689 
1690 		dev->size += (size / NDIMMS);
1691 
1692 		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1693 		    dmidx, size));
1694 	}
1695 
1696 	/*
1697 	 * Get the segment by matching the base address, link this bank
1698 	 * to the segment. If not matched, allocate a new segment and
1699 	 * add it at segment list.
1700 	 */
1701 	if (seg_curr = seg_match_base(base)) {
1702 		seg_curr->nbanks++;
1703 		seg_curr->size += size;
1704 		if (ifactor > seg_curr->ifactor)
1705 			seg_curr->ifactor = ifactor;
1706 		bank_curr->seg_id = seg_curr->seg_node.id;
1707 	} else {
1708 		seg_curr = (struct seg_info *)
1709 		kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1710 		bank_curr->seg_id = seg_id;
1711 		seg_curr->seg_node.id = seg_id++;
1712 		seg_curr->base = base;
1713 		seg_curr->size = size;
1714 		seg_curr->nbanks = 1;
1715 		seg_curr->ifactor = ifactor;
1716 		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1717 
1718 		nsegments++;
1719 	}
1720 
1721 	/* Get the local id of bank which is only unique per segment. */
1722 	bank_curr->local_id = seg_curr->nbanks - 1;
1723 
1724 	/* add bank at the end of the list; not sorted by bankid */
1725 	if (seg_curr->hb_inseg != NULL) {
1726 		bank_curr->p_inseg = seg_curr->tb_inseg;
1727 		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1728 		seg_curr->tb_inseg->n_inseg = bank_curr;
1729 		seg_curr->tb_inseg = bank_curr;
1730 	} else {
1731 		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1732 		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1733 	}
1734 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1735 	    seg_curr->seg_node.id));
1736 
1737 	if (mc_dimm_sids) {
1738 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1739 		mc_update_bank(bank_curr);
1740 		rw_exit(&mcdimmsids_rw);
1741 	}
1742 	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1743 
1744 	memsize += size;
1745 	if (seg_curr->nbanks > maxbanks)
1746 		maxbanks = seg_curr->nbanks;
1747 
1748 exit:
1749 	mutex_exit(&mcdatamutex);
1750 	return (status);
1751 }
1752 
1753 /*
1754  * Delete nodes related to the given MC on mc, device group, device,
1755  * and bank lists. Moreover, delete corresponding segment if its connected
1756  * banks are all removed.
1757  *
1758  * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In
1759  * this case, the DIMM data structures need to be deleted. The argument is
1760  * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case,
1761  * the DIMM data structures are left alone.
1762  */
1763 static void
1764 mlayout_del(int mc_id, int delete)
1765 {
1766 	int i, j, dgrpid, devid, bankid, ndevgrps;
1767 	struct seg_info *seg;
1768 	struct bank_info *bank_curr;
1769 	struct mctrl_info *mctrl;
1770 	mc_dlist_t *dgrp_ptr;
1771 	mc_dlist_t *dev_ptr;
1772 	uint64_t base;
1773 
1774 	mutex_enter(&mcdatamutex);
1775 
1776 	/* delete mctrl_info */
1777 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1778 	    NULL) {
1779 		ndevgrps = mctrl->ndevgrps;
1780 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1781 		kmem_free(mctrl, sizeof (struct mctrl_info));
1782 		nmcs--;
1783 
1784 		/*
1785 		 * There is no other list left for disabled MC.
1786 		 */
1787 		if (ndevgrps == 0) {
1788 			mutex_exit(&mcdatamutex);
1789 			return;
1790 		}
1791 	} else
1792 		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1793 
1794 	/* Delete device groups and devices of the detached MC */
1795 	for (i = 0; i < NDGRPS; i++) {
1796 		dgrpid = mc_id * NDGRPS + i;
1797 		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1798 			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1799 			continue;
1800 		}
1801 
1802 		for (j = 0; j < NDIMMS; j++) {
1803 			devid = dgrpid * NDIMMS + j;
1804 			if (dev_ptr = mc_node_get(devid, device_head)) {
1805 				mc_node_del(dev_ptr, &device_head,
1806 				    &device_tail);
1807 				kmem_free(dev_ptr, sizeof (struct device_info));
1808 			} else {
1809 				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1810 				    devid);
1811 			}
1812 		}
1813 
1814 		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1815 		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1816 	}
1817 
1818 	/* Delete banks and segments if it has no bank */
1819 	for (i = 0; i < NBANKS; i++) {
1820 		bankid = mc_id * NBANKS + i;
1821 		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1822 		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1823 		    bank_head))) {
1824 			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1825 			continue;
1826 		}
1827 
1828 		if (bank_curr->valid) {
1829 			base = bank_curr->um & ~bank_curr->uk;
1830 			base <<= MADR_UPA_SHIFT;
1831 			bank_curr->valid = 0;
1832 			memsize -= bank_curr->size;
1833 
1834 			/* Delete bank at segment and segment if no bank left */
1835 			if (!(seg = seg_match_base(base))) {
1836 				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1837 				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1838 				    &bank_tail);
1839 				kmem_free(bank_curr, sizeof (struct bank_info));
1840 				continue;
1841 			}
1842 
1843 			/* update the bank list at the segment */
1844 			if (bank_curr->n_inseg == NULL) {
1845 				/* node is at the tail of list */
1846 				seg->tb_inseg = bank_curr->p_inseg;
1847 			} else {
1848 				bank_curr->n_inseg->p_inseg =
1849 				    bank_curr->p_inseg;
1850 			}
1851 
1852 			if (bank_curr->p_inseg == NULL) {
1853 				/* node is at the head of list */
1854 				seg->hb_inseg = bank_curr->n_inseg;
1855 			} else {
1856 				bank_curr->p_inseg->n_inseg =
1857 				    bank_curr->n_inseg;
1858 			}
1859 
1860 			seg->nbanks--;
1861 			seg->size -= bank_curr->size;
1862 
1863 			if (seg->nbanks == 0) {
1864 				mc_node_del((mc_dlist_t *)seg, &seg_head,
1865 				    &seg_tail);
1866 				kmem_free(seg, sizeof (struct seg_info));
1867 				nsegments--;
1868 			}
1869 
1870 		}
1871 		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1872 		kmem_free(bank_curr, sizeof (struct bank_info));
1873 	}	/* end of for loop for four banks */
1874 
1875 	if (mc_dimm_sids && delete) {
1876 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1877 		i = mc_get_sid_cache_index(mc_id);
1878 		if (i >= 0) {
1879 			mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID;
1880 			if (mc_dimm_sids[i].sids) {
1881 				kmem_free(mc_dimm_sids[i].sids,
1882 				    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS));
1883 				mc_dimm_sids[i].sids = NULL;
1884 			}
1885 		}
1886 		rw_exit(&mcdimmsids_rw);
1887 	}
1888 
1889 	mutex_exit(&mcdatamutex);
1890 }
1891 
1892 /*
1893  * Search the segment in the list starting at seg_head by base address
1894  * input: base address
1895  * return: pointer of found segment or null if not found.
1896  */
1897 static struct seg_info *
1898 seg_match_base(u_longlong_t base)
1899 {
1900 	static struct seg_info *seg_ptr;
1901 
1902 	seg_ptr = (struct seg_info *)seg_head;
1903 	while (seg_ptr != NULL) {
1904 		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1905 		    seg_ptr->base, base));
1906 		if (seg_ptr->base == base)
1907 			break;
1908 		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1909 	}
1910 	return (seg_ptr);
1911 }
1912 
1913 /*
1914  * mc_dlist is a double linking list, including unique id, and pointers to
1915  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1916  * and mctrl_info has it at the top to share the operations, add, del, and get.
1917  *
1918  * The new node is added at the tail and is not sorted.
1919  *
1920  * Input: The pointer of node to be added, head and tail of the list
1921  */
1922 
1923 static void
1924 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1925 {
1926 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1927 		node->id, *head, *tail));
1928 
1929 	if (*head != NULL) {
1930 		node->prev = *tail;
1931 		node->next = (*tail)->next;
1932 		(*tail)->next = node;
1933 		*tail = node;
1934 	} else {
1935 		node->next = node->prev = NULL;
1936 		*head = *tail = node;
1937 	}
1938 }
1939 
1940 /*
1941  * Input: The pointer of node to be deleted, head and tail of the list
1942  *
1943  * Deleted node will be at the following positions
1944  * 1. At the tail of the list
1945  * 2. At the head of the list
1946  * 3. At the head and tail of the list, i.e. only one left.
1947  * 4. At the middle of the list
1948  */
1949 
1950 static void
1951 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1952 {
1953 	if (node->next == NULL) {
1954 		/* deleted node is at the tail of list */
1955 		*tail = node->prev;
1956 	} else {
1957 		node->next->prev = node->prev;
1958 	}
1959 
1960 	if (node->prev == NULL) {
1961 		/* deleted node is at the head of list */
1962 		*head = node->next;
1963 	} else {
1964 		node->prev->next = node->next;
1965 	}
1966 }
1967 
1968 /*
1969  * Search the list from the head of the list to match the given id
1970  * Input: id and the head of the list
1971  * Return: pointer of found node
1972  */
1973 static mc_dlist_t *
1974 mc_node_get(int id, mc_dlist_t *head)
1975 {
1976 	mc_dlist_t *node;
1977 
1978 	node = head;
1979 	while (node != NULL) {
1980 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1981 		    node->id, id));
1982 		if (node->id == id)
1983 			break;
1984 		node = node->next;
1985 	}
1986 	return (node);
1987 }
1988 
1989 /*
1990  * mc-us3 driver allows a platform to add extra label
1991  * information to the unum string. If a platform implements a
1992  * kernel function called plat_add_mem_unum_label() it will be
1993  * executed. This would typically be implemented in the platmod.
1994  */
1995 static void
1996 mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
1997 {
1998 	if (&plat_add_mem_unum_label)
1999 		plat_add_mem_unum_label(buf, mcid, bank, dimm);
2000 }
2001 
2002 static int
2003 mc_get_sid_cache_index(int mcid)
2004 {
2005 	int	i;
2006 
2007 	for (i = 0; i < max_entries; i++) {
2008 		if (mcid == mc_dimm_sids[i].mcid)
2009 			return (i);
2010 	}
2011 
2012 	return (-1);
2013 }
2014 
2015 static void
2016 mc_update_bank(struct bank_info *bank)
2017 {
2018 	int i, j;
2019 	int bankid, mcid, dgrp_no;
2020 
2021 	/*
2022 	 * Mark the MC if DIMM sids are not available.
2023 	 * Mark which segment the DIMMs belong to.  Allocate
2024 	 * space to store DIMM serial ids which are later
2025 	 * provided by the platform layer, and update the bank_info
2026 	 * structure with pointers to its serial ids.
2027 	 */
2028 	bankid = bank->bank_node.id;
2029 	mcid = bankid / NBANKS;
2030 	i = mc_get_sid_cache_index(mcid);
2031 	if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID)
2032 		mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED;
2033 
2034 	mc_dimm_sids[i].seg_id = bank->seg_id;
2035 
2036 	if (mc_dimm_sids[i].sids == NULL) {
2037 		mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc(
2038 		    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP);
2039 	}
2040 
2041 	dgrp_no = bank->devgrp_id % NDGRPS;
2042 
2043 	for (j = 0; j < NDIMMS; j++) {
2044 		bank->dimmsidp[j] =
2045 		    &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)];
2046 	}
2047 }
2048 
2049 static int
2050 mc_populate_sid_cache(void)
2051 {
2052 	struct bank_info	*bank;
2053 
2054 	if (&plat_populate_sid_cache == 0)
2055 		return (ENOTSUP);
2056 
2057 	ASSERT(RW_WRITE_HELD(&mcdimmsids_rw));
2058 
2059 	bank = (struct bank_info *)bank_head;
2060 	while (bank != NULL) {
2061 		if (!bank->valid) {
2062 			bank = (struct bank_info *)bank->bank_node.next;
2063 			continue;
2064 		}
2065 
2066 		mc_update_bank(bank);
2067 
2068 		bank = (struct bank_info *)bank->bank_node.next;
2069 	}
2070 
2071 
2072 	/*
2073 	 * Call to the platform layer to populate the cache
2074 	 * with DIMM serial ids.
2075 	 */
2076 	return (plat_populate_sid_cache(mc_dimm_sids, max_entries));
2077 }
2078 
2079 static void
2080 mc_init_sid_cache_thr(void)
2081 {
2082 	ASSERT(mc_dimm_sids == NULL);
2083 
2084 	mutex_enter(&mcdatamutex);
2085 	rw_enter(&mcdimmsids_rw, RW_WRITER);
2086 
2087 	mc_dimm_sids = plat_alloc_sid_cache(&max_entries);
2088 	(void) mc_populate_sid_cache();
2089 
2090 	rw_exit(&mcdimmsids_rw);
2091 	mutex_exit(&mcdatamutex);
2092 }
2093 
2094 static int
2095 mc_init_sid_cache(void)
2096 {
2097 	if (&plat_alloc_sid_cache) {
2098 		(void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0,
2099 		    &p0, TS_RUN, minclsyspri);
2100 		return (0);
2101 	} else
2102 		return (ENOTSUP);
2103 }
2104 
2105 static int
2106 mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp)
2107 {
2108 	int	i;
2109 
2110 	if (buflen < DIMM_SERIAL_ID_LEN)
2111 		return (ENOSPC);
2112 
2113 	/*
2114 	 * If DIMM serial ids have not been cached yet, tell the
2115 	 * caller to try again.
2116 	 */
2117 	if (!rw_tryenter(&mcdimmsids_rw, RW_READER))
2118 		return (EAGAIN);
2119 
2120 	if (mc_dimm_sids == NULL) {
2121 		rw_exit(&mcdimmsids_rw);
2122 		return (EAGAIN);
2123 	}
2124 
2125 	/*
2126 	 * Find dimm serial id using mcid and dimm #
2127 	 */
2128 	for (i = 0; i < max_entries; i++) {
2129 		if (mc_dimm_sids[i].mcid == mcid)
2130 			break;
2131 	}
2132 	if ((i == max_entries) || (!mc_dimm_sids[i].sids)) {
2133 		rw_exit(&mcdimmsids_rw);
2134 		return (ENOENT);
2135 	}
2136 
2137 	(void) strlcpy(buf, mc_dimm_sids[i].sids[dimm],
2138 	    DIMM_SERIAL_ID_LEN);
2139 	*lenp = strlen(buf);
2140 
2141 	rw_exit(&mcdimmsids_rw);
2142 	return (0);
2143 }
2144