xref: /titanic_50/usr/src/uts/sun4u/opl/io/mc-opl.c (revision 3db30c357c20c1eb09687fd0194e0ca62d6358cb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2007
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/conf.h>
34 #include <sys/modctl.h>
35 #include <sys/stat.h>
36 #include <sys/async.h>
37 #include <sys/machcpuvar.h>
38 #include <sys/machsystm.h>
39 #include <sys/promif.h>
40 #include <sys/ksynch.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/sunndi.h>
44 #include <sys/ddifm.h>
45 #include <sys/fm/protocol.h>
46 #include <sys/fm/util.h>
47 #include <sys/kmem.h>
48 #include <sys/fm/io/opl_mc_fm.h>
49 #include <sys/memlist.h>
50 #include <sys/param.h>
51 #include <sys/disp.h>
52 #include <vm/page.h>
53 #include <sys/mc-opl.h>
54 #include <sys/opl.h>
55 #include <sys/opl_dimm.h>
56 #include <sys/scfd/scfostoescf.h>
57 #include <sys/cpu_module.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/vmem.h>
60 #include <vm/hat_sfmmu.h>
61 #include <sys/vmsystm.h>
62 #include <sys/membar.h>
63 
64 /*
65  * Function prototypes
66  */
67 static int mc_open(dev_t *, int, int, cred_t *);
68 static int mc_close(dev_t, int, int, cred_t *);
69 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
70 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
71 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
72 
73 static int mc_poll_init(void);
74 static void mc_poll_fini(void);
75 static int mc_board_add(mc_opl_t *mcp);
76 static int mc_board_del(mc_opl_t *mcp);
77 static int mc_suspend(mc_opl_t *mcp, uint32_t flag);
78 static int mc_resume(mc_opl_t *mcp, uint32_t flag);
79 int opl_mc_suspend(void);
80 int opl_mc_resume(void);
81 
82 static void insert_mcp(mc_opl_t *mcp);
83 static void delete_mcp(mc_opl_t *mcp);
84 
85 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr);
86 
87 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa);
88 
89 int mc_get_mem_unum(int, uint64_t, char *, int, int *);
90 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr);
91 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
92 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp);
93 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
94     int buflen, int *lenp);
95 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp);
96 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp);
97 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank,
98     uint32_t mf_type, uint32_t d_slot);
99 static void mc_free_dimm_list(mc_dimm_info_t *d);
100 static void mc_get_mlist(mc_opl_t *);
101 static void mc_polling(void);
102 static int mc_opl_get_physical_board(int);
103 
104 static void mc_clear_rewrite(mc_opl_t *mcp, int i);
105 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state);
106 
107 #ifdef	DEBUG
108 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *);
109 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz);
110 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp);
111 #endif
112 
113 #pragma weak opl_get_physical_board
114 extern int opl_get_physical_board(int);
115 extern int plat_max_boards(void);
116 
117 /*
118  * Configuration data structures
119  */
120 static struct cb_ops mc_cb_ops = {
121 	mc_open,			/* open */
122 	mc_close,			/* close */
123 	nulldev,			/* strategy */
124 	nulldev,			/* print */
125 	nodev,				/* dump */
126 	nulldev,			/* read */
127 	nulldev,			/* write */
128 	mc_ioctl,			/* ioctl */
129 	nodev,				/* devmap */
130 	nodev,				/* mmap */
131 	nodev,				/* segmap */
132 	nochpoll,			/* poll */
133 	ddi_prop_op,			/* cb_prop_op */
134 	0,				/* streamtab */
135 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
136 	CB_REV,				/* rev */
137 	nodev,				/* cb_aread */
138 	nodev				/* cb_awrite */
139 };
140 
141 static struct dev_ops mc_ops = {
142 	DEVO_REV,			/* rev */
143 	0,				/* refcnt  */
144 	ddi_getinfo_1to1,		/* getinfo */
145 	nulldev,			/* identify */
146 	nulldev,			/* probe */
147 	mc_attach,			/* attach */
148 	mc_detach,			/* detach */
149 	nulldev,			/* reset */
150 	&mc_cb_ops,			/* cb_ops */
151 	(struct bus_ops *)0,		/* bus_ops */
152 	nulldev				/* power */
153 };
154 
155 /*
156  * Driver globals
157  */
158 
159 static enum {
160 	MODEL_FF1 = 0,
161 	MODEL_FF2 = 1,
162 	MODEL_DC = 2
163 } plat_model = MODEL_DC;	/* The default behaviour is DC */
164 
165 static struct plat_model_names {
166 	const char *unit_name;
167 	const char *mem_name;
168 } model_names[] = {
169 	{ "MBU_A", "MEMB" },
170 	{ "MBU_B", "MEMB" },
171 	{ "CMU", "" }
172 };
173 
174 /*
175  * The DIMM Names for DC platform.
176  * The index into this table is made up of (bank, dslot),
177  * Where dslot occupies bits 0-1 and bank occupies 2-4.
178  */
179 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = {
180 	/* --------CMUnn----------- */
181 	/* --CS0-----|--CS1------ */
182 	/* -H-|--L-- | -H- | -L-- */
183 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
184 	"13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */
185 	"23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */
186 	"33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */
187 	"01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */
188 	"11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */
189 	"21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */
190 	"31A", "30A", "31B", "30B"  /* Bank 7 (MAC 3 bank 1) */
191 };
192 
193 /*
194  * The DIMM Names for FF1/FF2 platforms.
195  * The index into this table is made up of (board, bank, dslot),
196  * Where dslot occupies bits 0-1, bank occupies 2-4 and
197  * board occupies the bit 5.
198  */
199 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = {
200 	/* --------CMU0---------- */
201 	/* --CS0-----|--CS1------ */
202 	/* -H-|--L-- | -H- | -L-- */
203 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
204 	"01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */
205 	"13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */
206 	"11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */
207 	"23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */
208 	"21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */
209 	"33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */
210 	"31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */
211 	/* --------CMU1---------- */
212 	/* --CS0-----|--CS1------ */
213 	/* -H-|--L-- | -H- | -L-- */
214 	"43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */
215 	"41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */
216 	"53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */
217 	"51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */
218 	"63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */
219 	"61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */
220 	"73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */
221 	"71A", "70A", "71B", "70B"  /* Bank 7 (MAC 3 bank 1) */
222 };
223 
224 #define	BD_BK_SLOT_TO_INDEX(bd, bk, s)			\
225 	(((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03))
226 
227 #define	INDEX_TO_BANK(i)			(((i) & 0x1C) >> 2)
228 #define	INDEX_TO_SLOT(i)			((i) & 0x03)
229 
230 #define	SLOT_TO_CS(slot)	((slot & 0x3) >> 1)
231 
232 /* Isolation unit size is 64 MB */
233 #define	MC_ISOLATION_BSIZE	(64 * 1024 * 1024)
234 
235 #define	MC_MAX_SPEEDS 7
236 
237 typedef struct {
238 	uint32_t mc_speeds;
239 	uint32_t mc_period;
240 } mc_scan_speed_t;
241 
242 #define	MC_CNTL_SPEED_SHIFT 26
243 
244 /*
245  * In mirror mode, we normalized the bank idx to "even" since
246  * the HW treats them as one unit w.r.t programming.
247  * This bank index will be the "effective" bank index.
248  * All mirrored bank state info on mc_period, mc_speedup_period
249  * will be stored in the even bank structure to avoid code duplication.
250  */
251 #define	MIRROR_IDX(bankidx)	(bankidx & ~1)
252 
253 static mc_scan_speed_t	mc_scan_speeds[MC_MAX_SPEEDS] = {
254 	{0x6 << MC_CNTL_SPEED_SHIFT, 0},
255 	{0x5 << MC_CNTL_SPEED_SHIFT, 32},
256 	{0x4 << MC_CNTL_SPEED_SHIFT, 64},
257 	{0x3 << MC_CNTL_SPEED_SHIFT, 128},
258 	{0x2 << MC_CNTL_SPEED_SHIFT, 256},
259 	{0x1 << MC_CNTL_SPEED_SHIFT, 512},
260 	{0x0 << MC_CNTL_SPEED_SHIFT, 1024}
261 };
262 
263 static uint32_t	mc_max_speed = (0x6 << 26);
264 
265 int mc_isolation_bsize = MC_ISOLATION_BSIZE;
266 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC;
267 int mc_max_scf_retry = 16;
268 int mc_max_scf_logs = 64;
269 int mc_max_errlog_processed = BANKNUM_PER_SB*2;
270 int mc_scan_period = 12 * 60 * 60;	/* 12 hours period */
271 int mc_max_rewrite_loop = 100;
272 int mc_rewrite_delay = 10;
273 /*
274  * it takes SCF about 300 m.s. to process a requst.  We can bail out
275  * if it is busy.  It does not pay to wait for it too long.
276  */
277 int mc_max_scf_loop = 2;
278 int mc_scf_delay = 100;
279 int mc_pce_dropped = 0;
280 int mc_poll_priority = MINCLSYSPRI;
281 int mc_max_rewrite_retry = 6 * 60;
282 
283 
284 /*
285  * Mutex hierarchy in mc-opl
286  * If both mcmutex and mc_lock must be held,
287  * mcmutex must be acquired first, and then mc_lock.
288  */
289 
290 static kmutex_t mcmutex;
291 mc_opl_t *mc_instances[OPL_MAX_BOARDS];
292 
293 static kmutex_t mc_polling_lock;
294 static kcondvar_t mc_polling_cv;
295 static kcondvar_t mc_poll_exit_cv;
296 static int mc_poll_cmd = 0;
297 static int mc_pollthr_running = 0;
298 int mc_timeout_period = 0; /* this is in m.s. */
299 void *mc_statep;
300 
301 #ifdef	DEBUG
302 int oplmc_debug = 0;
303 #endif
304 
305 static int mc_debug_show_all = 0;
306 
307 extern struct mod_ops mod_driverops;
308 
309 static struct modldrv modldrv = {
310 	&mod_driverops,			/* module type, this one is a driver */
311 	"OPL Memory-controller %I%",	/* module name */
312 	&mc_ops,			/* driver ops */
313 };
314 
315 static struct modlinkage modlinkage = {
316 	MODREV_1,		/* rev */
317 	(void *)&modldrv,
318 	NULL
319 };
320 
321 #pragma weak opl_get_mem_unum
322 #pragma weak opl_get_mem_sid
323 #pragma weak opl_get_mem_offset
324 #pragma weak opl_get_mem_addr
325 
326 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
327 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
328 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
329 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset,
330     uint64_t *paddr);
331 
332 
333 /*
334  * pseudo-mc node portid format
335  *
336  *		[10]   = 0
337  *		[9]    = 1
338  *		[8]    = LSB_ID[4] = 0
339  *		[7:4]  = LSB_ID[3:0]
340  *		[3:0]  = 0
341  *
342  */
343 
344 /*
345  * These are the module initialization routines.
346  */
347 int
348 _init(void)
349 {
350 	int	error;
351 	int	plen;
352 	char	model[20];
353 	pnode_t	node;
354 
355 
356 	if ((error = ddi_soft_state_init(&mc_statep,
357 	    sizeof (mc_opl_t), 1)) != 0)
358 		return (error);
359 
360 	if ((error = mc_poll_init()) != 0) {
361 		ddi_soft_state_fini(&mc_statep);
362 		return (error);
363 	}
364 
365 	mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
366 	if (&opl_get_mem_unum)
367 		opl_get_mem_unum = mc_get_mem_unum;
368 	if (&opl_get_mem_sid)
369 		opl_get_mem_sid = mc_get_mem_sid;
370 	if (&opl_get_mem_offset)
371 		opl_get_mem_offset = mc_get_mem_offset;
372 	if (&opl_get_mem_addr)
373 		opl_get_mem_addr = mc_get_mem_addr;
374 
375 	node = prom_rootnode();
376 	plen = prom_getproplen(node, "model");
377 
378 	if (plen > 0 && plen < sizeof (model)) {
379 		(void) prom_getprop(node, "model", model);
380 		model[plen] = '\0';
381 		if (strcmp(model, "FF1") == 0)
382 			plat_model = MODEL_FF1;
383 		else if (strcmp(model, "FF2") == 0)
384 			plat_model = MODEL_FF2;
385 		else if (strncmp(model, "DC", 2) == 0)
386 			plat_model = MODEL_DC;
387 	}
388 
389 	error =  mod_install(&modlinkage);
390 	if (error != 0) {
391 		if (&opl_get_mem_unum)
392 			opl_get_mem_unum = NULL;
393 		if (&opl_get_mem_sid)
394 			opl_get_mem_sid = NULL;
395 		if (&opl_get_mem_offset)
396 			opl_get_mem_offset = NULL;
397 		if (&opl_get_mem_addr)
398 			opl_get_mem_addr = NULL;
399 		mutex_destroy(&mcmutex);
400 		mc_poll_fini();
401 		ddi_soft_state_fini(&mc_statep);
402 	}
403 	return (error);
404 }
405 
406 int
407 _fini(void)
408 {
409 	int error;
410 
411 	if ((error = mod_remove(&modlinkage)) != 0)
412 		return (error);
413 
414 	if (&opl_get_mem_unum)
415 		opl_get_mem_unum = NULL;
416 	if (&opl_get_mem_sid)
417 		opl_get_mem_sid = NULL;
418 	if (&opl_get_mem_offset)
419 		opl_get_mem_offset = NULL;
420 	if (&opl_get_mem_addr)
421 		opl_get_mem_addr = NULL;
422 
423 	mutex_destroy(&mcmutex);
424 	mc_poll_fini();
425 	ddi_soft_state_fini(&mc_statep);
426 
427 	return (0);
428 }
429 
430 int
431 _info(struct modinfo *modinfop)
432 {
433 	return (mod_info(&modlinkage, modinfop));
434 }
435 
436 static void
437 mc_polling_thread()
438 {
439 	mutex_enter(&mc_polling_lock);
440 	mc_pollthr_running = 1;
441 	while (!(mc_poll_cmd & MC_POLL_EXIT)) {
442 		mc_polling();
443 		cv_timedwait(&mc_polling_cv, &mc_polling_lock,
444 		    ddi_get_lbolt() + mc_timeout_period);
445 	}
446 	mc_pollthr_running = 0;
447 
448 	/*
449 	 * signal if any one is waiting for this thread to exit.
450 	 */
451 	cv_signal(&mc_poll_exit_cv);
452 	mutex_exit(&mc_polling_lock);
453 	thread_exit();
454 	/* NOTREACHED */
455 }
456 
457 static int
458 mc_poll_init()
459 {
460 	mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL);
461 	cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL);
462 	cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL);
463 	return (0);
464 }
465 
466 static void
467 mc_poll_fini()
468 {
469 	mutex_enter(&mc_polling_lock);
470 	if (mc_pollthr_running) {
471 		mc_poll_cmd = MC_POLL_EXIT;
472 		cv_signal(&mc_polling_cv);
473 		while (mc_pollthr_running) {
474 			cv_wait(&mc_poll_exit_cv, &mc_polling_lock);
475 		}
476 	}
477 	mutex_exit(&mc_polling_lock);
478 	mutex_destroy(&mc_polling_lock);
479 	cv_destroy(&mc_polling_cv);
480 	cv_destroy(&mc_poll_exit_cv);
481 }
482 
483 static int
484 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
485 {
486 	mc_opl_t *mcp;
487 	int instance;
488 	int rv;
489 
490 	/* get the instance of this devi */
491 	instance = ddi_get_instance(devi);
492 
493 	switch (cmd) {
494 	case DDI_ATTACH:
495 		break;
496 	case DDI_RESUME:
497 		mcp = ddi_get_soft_state(mc_statep, instance);
498 		rv = mc_resume(mcp, MC_DRIVER_SUSPENDED);
499 		return (rv);
500 	default:
501 		return (DDI_FAILURE);
502 	}
503 
504 	if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS)
505 		return (DDI_FAILURE);
506 
507 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
508 		goto bad;
509 	}
510 
511 	if (mc_timeout_period == 0) {
512 		mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi,
513 		    DDI_PROP_DONTPASS, "mc-timeout-interval-sec",
514 		    mc_patrol_interval_sec);
515 		mc_timeout_period = drv_usectohz(1000000 *
516 		    mc_patrol_interval_sec / OPL_MAX_BOARDS);
517 	}
518 
519 	/* set informations in mc state */
520 	mcp->mc_dip = devi;
521 
522 	if (mc_board_add(mcp))
523 		goto bad;
524 
525 	insert_mcp(mcp);
526 
527 	/*
528 	 * Start the polling thread if it is not running already.
529 	 */
530 	mutex_enter(&mc_polling_lock);
531 	if (!mc_pollthr_running) {
532 		(void) thread_create(NULL, 0, (void (*)())mc_polling_thread,
533 		    NULL, 0, &p0, TS_RUN, mc_poll_priority);
534 	}
535 	mutex_exit(&mc_polling_lock);
536 	ddi_report_dev(devi);
537 
538 	return (DDI_SUCCESS);
539 
540 bad:
541 	ddi_soft_state_free(mc_statep, instance);
542 	return (DDI_FAILURE);
543 }
544 
545 /* ARGSUSED */
546 static int
547 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
548 {
549 	int rv;
550 	int instance;
551 	mc_opl_t *mcp;
552 
553 	/* get the instance of this devi */
554 	instance = ddi_get_instance(devi);
555 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
556 		return (DDI_FAILURE);
557 	}
558 
559 	switch (cmd) {
560 	case DDI_SUSPEND:
561 		rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED);
562 		return (rv);
563 	case DDI_DETACH:
564 		break;
565 	default:
566 		return (DDI_FAILURE);
567 	}
568 
569 	delete_mcp(mcp);
570 	if (mc_board_del(mcp) != DDI_SUCCESS) {
571 		return (DDI_FAILURE);
572 	}
573 
574 	/* free up the soft state */
575 	ddi_soft_state_free(mc_statep, instance);
576 
577 	return (DDI_SUCCESS);
578 }
579 
580 /* ARGSUSED */
581 static int
582 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
583 {
584 	return (0);
585 }
586 
587 /* ARGSUSED */
588 static int
589 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
590 {
591 	return (0);
592 }
593 
594 /* ARGSUSED */
595 static int
596 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
597 	int *rvalp)
598 {
599 #ifdef DEBUG
600 	return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp));
601 #else
602 	return (ENXIO);
603 #endif
604 }
605 
606 /*
607  * PA validity check:
608  * This function return 1 if the PA is a valid PA
609  * in the running Solaris instance i.e. in physinstall
610  * Otherwise, return 0.
611  */
612 
613 /* ARGSUSED */
614 static int
615 pa_is_valid(mc_opl_t *mcp, uint64_t addr)
616 {
617 	if (mcp->mlist == NULL)
618 		mc_get_mlist(mcp);
619 
620 	if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) {
621 		return (1);
622 	}
623 	return (0);
624 }
625 
626 /*
627  * mac-pa translation routines.
628  *
629  *    Input: mc driver state, (LSB#, Bank#, DIMM address)
630  *    Output: physical address
631  *
632  *    Valid   - return value:  0
633  *    Invalid - return value: -1
634  */
635 static int
636 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa)
637 {
638 	int i;
639 	uint64_t pa_offset = 0;
640 	int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1;
641 	int bank = maddr->ma_bank;
642 	mc_addr_t maddr1;
643 	int bank0, bank1;
644 
645 	MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
646 	    maddr->ma_dimm_addr);
647 
648 	/* loc validity check */
649 	ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd);
650 	ASSERT(bank >= 0 && OPL_BANK_MAX > bank);
651 
652 	/* Do translation */
653 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
654 		int pa_bit = 0;
655 		int mc_bit = mcp->mc_trans_table[cs][i];
656 		if (mc_bit < MC_ADDRESS_BITS) {
657 			pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1;
658 		} else if (mc_bit == MP_NONE) {
659 			pa_bit = 0;
660 		} else if (mc_bit == MP_BANK_0) {
661 			pa_bit = bank & 1;
662 		} else if (mc_bit == MP_BANK_1) {
663 			pa_bit = (bank >> 1) & 1;
664 		} else if (mc_bit == MP_BANK_2) {
665 			pa_bit = (bank >> 2) & 1;
666 		}
667 		pa_offset |= ((uint64_t)pa_bit) << i;
668 	}
669 	*pa = mcp->mc_start_address + pa_offset;
670 	MC_LOG("pa = %lx\n", *pa);
671 
672 	if (pa_to_maddr(mcp, *pa, &maddr1) == -1) {
673 		cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to "
674 		    "convert PA %lx\n", maddr->ma_bd, bank,
675 		    maddr->ma_dimm_addr, *pa);
676 		return (-1);
677 	}
678 
679 	/*
680 	 * In mirror mode, PA is always translated to the even bank.
681 	 */
682 	if (IS_MIRROR(mcp, maddr->ma_bank)) {
683 		bank0 = maddr->ma_bank & ~(1);
684 		bank1 = maddr1.ma_bank & ~(1);
685 	} else {
686 		bank0 = maddr->ma_bank;
687 		bank1 = maddr1.ma_bank;
688 	}
689 	/*
690 	 * there is no need to check ma_bd because it is generated from
691 	 * mcp.  They are the same.
692 	 */
693 	if ((bank0 == bank1) && (maddr->ma_dimm_addr ==
694 	    maddr1.ma_dimm_addr)) {
695 		return (0);
696 	} else {
697 		cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, "
698 		    "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
699 		    maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank,
700 		    maddr1.ma_dimm_addr);
701 		return (-1);
702 	}
703 }
704 
705 /*
706  * PA to CS (used by pa_to_maddr).
707  */
708 static int
709 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset)
710 {
711 	int i;
712 	int cs = 1;
713 
714 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
715 		/* MAC address bit<29> is arranged on the same PA bit */
716 		/* on both table. So we may use any table. */
717 		if (mcp->mc_trans_table[0][i] == CS_SHIFT) {
718 			cs = (pa_offset >> i) & 1;
719 			break;
720 		}
721 	}
722 	return (cs);
723 }
724 
725 /*
726  * PA to DIMM (used by pa_to_maddr).
727  */
728 /* ARGSUSED */
729 static uint32_t
730 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset)
731 {
732 	int i;
733 	int cs = pa_to_cs(mcp, pa_offset);
734 	uint32_t dimm_addr = 0;
735 
736 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
737 		int pa_bit_value = (pa_offset >> i) & 1;
738 		int mc_bit = mcp->mc_trans_table[cs][i];
739 		if (mc_bit < MC_ADDRESS_BITS) {
740 			dimm_addr |= pa_bit_value << mc_bit;
741 		}
742 	}
743 	dimm_addr |= cs << CS_SHIFT;
744 	return (dimm_addr);
745 }
746 
747 /*
748  * PA to Bank (used by pa_to_maddr).
749  */
750 static int
751 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset)
752 {
753 	int i;
754 	int cs = pa_to_cs(mcp, pa_offset);
755 	int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT];
756 
757 
758 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
759 		int pa_bit_value = (pa_offset >> i) & 1;
760 		int mc_bit = mcp->mc_trans_table[cs][i];
761 		switch (mc_bit) {
762 		case MP_BANK_0:
763 			bankno |= pa_bit_value;
764 			break;
765 		case MP_BANK_1:
766 			bankno |= pa_bit_value << 1;
767 			break;
768 		case MP_BANK_2:
769 			bankno |= pa_bit_value << 2;
770 			break;
771 		}
772 	}
773 
774 	return (bankno);
775 }
776 
777 /*
778  * PA to MAC address translation
779  *
780  *   Input: MAC driver state, physicall adress
781  *   Output: LSB#, Bank id, mac address
782  *
783  *    Valid   - return value:  0
784  *    Invalid - return value: -1
785  */
786 
787 int
788 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr)
789 {
790 	uint64_t pa_offset;
791 
792 	if (!mc_rangecheck_pa(mcp, pa))
793 		return (-1);
794 
795 	/* Do translation */
796 	pa_offset = pa - mcp->mc_start_address;
797 
798 	maddr->ma_bd = mcp->mc_board_num;
799 	maddr->ma_phys_bd = mcp->mc_phys_board_num;
800 	maddr->ma_bank = pa_to_bank(mcp, pa_offset);
801 	maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset);
802 	MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd,
803 	    maddr->ma_bank, maddr->ma_dimm_addr);
804 	return (0);
805 }
806 
807 /*
808  * UNUM format for DC is "/CMUnn/MEMxyZ", where
809  *	nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3.
810  *	x = MAC 0..3
811  *	y = 0..3 (slot info).
812  *	Z = 'A' or 'B'
813  *
814  * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where
815  *	x = 0..3 (MEMB number)
816  *	y = 0..3 (slot info).
817  *	Z = 'A' or 'B'
818  *
819  * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ"
820  *	x = 0..7 (MEMB number)
821  *	y = 0..3 (slot info).
822  *	Z = 'A' or 'B'
823  */
824 int
825 mc_set_mem_unum(char *buf, int buflen, int sb, int bank,
826     uint32_t mf_type, uint32_t d_slot)
827 {
828 	char *dimmnm;
829 	char memb_num;
830 	int cs;
831 	int i;
832 	int j;
833 
834 	cs = SLOT_TO_CS(d_slot);
835 
836 	if (plat_model == MODEL_DC) {
837 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
838 		    mf_type == FLT_TYPE_PERMANENT_CE) {
839 			i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
840 			dimmnm = mc_dc_dimm_unum_table[i];
841 			snprintf(buf, buflen, "/%s%02d/MEM%s",
842 			    model_names[plat_model].unit_name, sb, dimmnm);
843 		} else {
844 			i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
845 			j = (cs == 0) ?  i : i + 2;
846 			snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
847 			    model_names[plat_model].unit_name, sb,
848 			    mc_dc_dimm_unum_table[j],
849 			    mc_dc_dimm_unum_table[j + 1]);
850 		}
851 	} else {
852 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
853 		    mf_type == FLT_TYPE_PERMANENT_CE) {
854 			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
855 			dimmnm = mc_ff_dimm_unum_table[i];
856 			memb_num = dimmnm[0];
857 			snprintf(buf, buflen, "/%s/%s%c/MEM%s",
858 			    model_names[plat_model].unit_name,
859 			    model_names[plat_model].mem_name,
860 			    memb_num, &dimmnm[1]);
861 		} else {
862 			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
863 			j = (cs == 0) ?  i : i + 2;
864 			memb_num = mc_ff_dimm_unum_table[i][0],
865 			    snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
866 			    model_names[plat_model].unit_name,
867 			    model_names[plat_model].mem_name, memb_num,
868 			    &mc_ff_dimm_unum_table[j][1],
869 			    &mc_ff_dimm_unum_table[j + 1][1]);
870 		}
871 	}
872 	return (0);
873 }
874 
875 static void
876 mc_ereport_post(mc_aflt_t *mc_aflt)
877 {
878 	char buf[FM_MAX_CLASS];
879 	char device_path[MAXPATHLEN];
880 	char sid[MAXPATHLEN];
881 	nv_alloc_t *nva = NULL;
882 	nvlist_t *ereport, *detector, *resource;
883 	errorq_elem_t *eqep;
884 	int nflts;
885 	mc_flt_stat_t *flt_stat;
886 	int i, n;
887 	int blen = MAXPATHLEN;
888 	char *p, *s = NULL;
889 	uint32_t values[2], synd[2], dslot[2];
890 	uint64_t offset = (uint64_t)-1;
891 	int ret = -1;
892 
893 	if (panicstr) {
894 		eqep = errorq_reserve(ereport_errorq);
895 		if (eqep == NULL)
896 			return;
897 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
898 		nva = errorq_elem_nva(ereport_errorq, eqep);
899 	} else {
900 		ereport = fm_nvlist_create(nva);
901 	}
902 
903 	/*
904 	 * Create the scheme "dev" FMRI.
905 	 */
906 	detector = fm_nvlist_create(nva);
907 	resource = fm_nvlist_create(nva);
908 
909 	nflts = mc_aflt->mflt_nflts;
910 
911 	ASSERT(nflts >= 1 && nflts <= 2);
912 
913 	flt_stat = mc_aflt->mflt_stat[0];
914 	(void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path);
915 	(void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL,
916 	    device_path, NULL);
917 
918 	/*
919 	 * Encode all the common data into the ereport.
920 	 */
921 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS,
922 	    mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS,
923 	    mc_aflt->mflt_erpt_class);
924 
925 	MC_LOG("mc_ereport_post: ereport %s\n", buf);
926 
927 
928 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
929 	    fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL);
930 
931 	/*
932 	 * Set payload.
933 	 */
934 	fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32,
935 	    flt_stat->mf_flt_maddr.ma_bd, NULL);
936 
937 	fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64,
938 	    flt_stat->mf_flt_paddr, NULL);
939 
940 	if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE ||
941 	    flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
942 		fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8,
943 		    ECC_STICKY, NULL);
944 	}
945 
946 	for (i = 0; i < nflts; i++)
947 		values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank;
948 
949 	fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts,
950 	    values, NULL);
951 
952 	for (i = 0; i < nflts; i++)
953 		values[i] = mc_aflt->mflt_stat[i]->mf_cntl;
954 
955 	fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts,
956 	    values, NULL);
957 
958 	for (i = 0; i < nflts; i++)
959 		values[i] = mc_aflt->mflt_stat[i]->mf_err_add;
960 
961 	/* offset is set only for PCE and ICE */
962 	if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE ||
963 	    mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) {
964 		offset = values[0];
965 
966 	}
967 	fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts,
968 	    values, NULL);
969 
970 	for (i = 0; i < nflts; i++)
971 		values[i] = mc_aflt->mflt_stat[i]->mf_err_log;
972 
973 	fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts,
974 	    values, NULL);
975 
976 	for (i = 0; i < nflts; i++) {
977 		flt_stat = mc_aflt->mflt_stat[i];
978 		if (flt_stat->mf_errlog_valid) {
979 			synd[i] = flt_stat->mf_synd;
980 			dslot[i] = flt_stat->mf_dimm_slot;
981 			values[i] = flt_stat->mf_dram_place;
982 		} else {
983 			synd[i] = 0;
984 			dslot[i] = 0;
985 			values[i] = 0;
986 		}
987 	}
988 
989 	fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts,
990 	    synd, NULL);
991 
992 	fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY,
993 	    nflts, dslot, NULL);
994 
995 	fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts,
996 	    values, NULL);
997 
998 	device_path[0] = 0;
999 	p = &device_path[0];
1000 	sid[0] = 0;
1001 	s = &sid[0];
1002 	ret = 0;
1003 
1004 	for (i = 0; i < nflts; i++) {
1005 		int bank;
1006 
1007 		flt_stat = mc_aflt->mflt_stat[i];
1008 		bank = flt_stat->mf_flt_maddr.ma_bank;
1009 		ret = mc_set_mem_unum(p + strlen(p), blen,
1010 		    flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type,
1011 		    flt_stat->mf_dimm_slot);
1012 
1013 		if (ret != 0) {
1014 			cmn_err(CE_WARN,
1015 			    "mc_ereport_post: Failed to determine the unum "
1016 			    "for board=%d bank=%d type=0x%x slot=0x%x",
1017 			    flt_stat->mf_flt_maddr.ma_bd, bank,
1018 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1019 			continue;
1020 		}
1021 		n = strlen(device_path);
1022 		blen = MAXPATHLEN - n;
1023 		p = &device_path[n];
1024 		if (i < (nflts - 1)) {
1025 			snprintf(p, blen, " ");
1026 			blen--;
1027 			p++;
1028 		}
1029 
1030 		if (ret == 0) {
1031 			ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s),
1032 			    blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank,
1033 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1034 
1035 		}
1036 	}
1037 
1038 	(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1039 	    device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset :
1040 	    (uint64_t)-1);
1041 
1042 	fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource,
1043 	    NULL);
1044 
1045 	if (panicstr) {
1046 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1047 	} else {
1048 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1049 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1050 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1051 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1052 	}
1053 }
1054 
1055 
1056 static void
1057 mc_err_drain(mc_aflt_t *mc_aflt)
1058 {
1059 	int rv;
1060 	uint64_t pa = (uint64_t)(-1);
1061 	int i;
1062 
1063 	MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class);
1064 	/*
1065 	 * we come here only when we have:
1066 	 * In mirror mode: MUE, SUE
1067 	 * In normal mode: UE, Permanent CE, Intermittent CE
1068 	 */
1069 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1070 		rv = mcaddr_to_pa(mc_aflt->mflt_mcp,
1071 		    &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa);
1072 
1073 		/* Ensure the pa is valid (not in isolated memory block) */
1074 		if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa))
1075 			mc_aflt->mflt_stat[i]->mf_flt_paddr = pa;
1076 		else
1077 			mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1;
1078 	}
1079 
1080 	MC_LOG("mc_err_drain:pa = %lx\n", pa);
1081 
1082 	switch (page_retire_check(pa, NULL)) {
1083 	case 0:
1084 	case EAGAIN:
1085 		MC_LOG("Page retired or pending\n");
1086 		return;
1087 	case EIO:
1088 		/*
1089 		 * Do page retirement except for the PCE and ICE cases.
1090 		 * This is taken care by the OPL DE
1091 		 */
1092 		if (mc_aflt->mflt_stat[0]->mf_type !=
1093 		    FLT_TYPE_INTERMITTENT_CE &&
1094 		    mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) {
1095 			MC_LOG("offline page at pa %lx error %x\n", pa,
1096 			    mc_aflt->mflt_pr);
1097 			(void) page_retire(pa, mc_aflt->mflt_pr);
1098 		}
1099 		break;
1100 	case EINVAL:
1101 	default:
1102 		/*
1103 		 * Some memory do not have page structure so
1104 		 * we keep going in case of EINVAL.
1105 		 */
1106 		break;
1107 	}
1108 
1109 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1110 		mc_aflt_t mc_aflt0;
1111 		if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) {
1112 			mc_aflt0 = *mc_aflt;
1113 			mc_aflt0.mflt_nflts = 1;
1114 			mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i];
1115 			mc_ereport_post(&mc_aflt0);
1116 		}
1117 	}
1118 }
1119 
1120 /*
1121  * The restart address is actually defined in unit of PA[37:6]
1122  * the mac patrol will convert that to dimm offset.  If the
1123  * address is not in the bank, it will continue to search for
1124  * the next PA that is within the bank.
1125  *
1126  * Also the mac patrol scans the dimms based on PA, not
1127  * dimm offset.
1128  */
1129 static int
1130 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info)
1131 {
1132 	uint64_t pa;
1133 	int rv;
1134 
1135 	if (MC_REWRITE_MODE(mcp, bank)) {
1136 		return (0);
1137 	}
1138 	if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) {
1139 		MAC_PTRL_START(mcp, bank);
1140 		return (0);
1141 	}
1142 
1143 	rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa);
1144 	if (rv != 0) {
1145 		MC_LOG("cannot convert mcaddr to pa. use auto restart\n");
1146 		MAC_PTRL_START(mcp, bank);
1147 		return (0);
1148 	}
1149 
1150 	if (!mc_rangecheck_pa(mcp, pa)) {
1151 		/* pa is not on this board, just retry */
1152 		cmn_err(CE_WARN, "restart_patrol: invalid address %lx "
1153 		    "on board %d\n", pa, mcp->mc_board_num);
1154 		MAC_PTRL_START(mcp, bank);
1155 		return (0);
1156 	}
1157 
1158 	MC_LOG("restart_patrol: pa = %lx\n", pa);
1159 
1160 	if (!rsaddr_info->mi_injectrestart) {
1161 		/*
1162 		 * For non-error injection restart we need to
1163 		 * determine if the current restart pa/page is
1164 		 * a "good" page. A "good" page is a page that
1165 		 * has not been page retired. If the current
1166 		 * page that contains the pa is "good", we will
1167 		 * do a HW auto restart and let HW patrol continue
1168 		 * where it last stopped. Most desired scenario.
1169 		 *
1170 		 * If the current page is not "good", we will advance
1171 		 * to the next page to find the next "good" page and
1172 		 * restart the patrol from there.
1173 		 */
1174 		int wrapcount = 0;
1175 		uint64_t origpa = pa;
1176 		while (wrapcount < 2) {
1177 			if (!pa_is_valid(mcp, pa)) {
1178 			/*
1179 			 * Not in physinstall - advance to the
1180 			 * next memory isolation blocksize
1181 			 */
1182 			MC_LOG("Invalid PA\n");
1183 			pa = roundup(pa + 1, mc_isolation_bsize);
1184 			} else {
1185 			int rv;
1186 			if ((rv = page_retire_check(pa, NULL)) != 0 &&
1187 			    rv != EAGAIN) {
1188 					/*
1189 					 * The page is "good" (not retired),
1190 					 * we will use automatic HW restart
1191 					 * algorithm if this is the original
1192 					 * current starting page.
1193 					 */
1194 				if (pa == origpa) {
1195 					MC_LOG("Page has no error. "
1196 					    "Auto restart\n");
1197 					MAC_PTRL_START(mcp, bank);
1198 					return (0);
1199 				} else {
1200 					/*
1201 					 * found a subsequent good page
1202 					 */
1203 					break;
1204 				}
1205 			}
1206 
1207 			/*
1208 			 * Skip to the next page
1209 			 */
1210 			pa = roundup(pa + 1, PAGESIZE);
1211 			MC_LOG("Skipping bad page to %lx\n", pa);
1212 			}
1213 
1214 		    /* Check to see if we hit the end of the memory range */
1215 			if (pa >= (mcp->mc_start_address + mcp->mc_size)) {
1216 			MC_LOG("Wrap around\n");
1217 			pa = mcp->mc_start_address;
1218 			wrapcount++;
1219 			}
1220 		}
1221 
1222 		if (wrapcount > 1) {
1223 			MC_LOG("Failed to find a good page. Just restart\n");
1224 			MAC_PTRL_START(mcp, bank);
1225 			return (0);
1226 		}
1227 	}
1228 
1229 	/*
1230 	 * We reached here either:
1231 	 * 1. We are doing an error injection restart that specify
1232 	 *    the exact pa/page to restart. OR
1233 	 * 2. We found a subsequent good page different from the
1234 	 *    original restart pa/page.
1235 	 * Restart MAC patrol: PA[37:6]
1236 	 */
1237 	MC_LOG("restart at pa = %lx\n", pa);
1238 	ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa));
1239 	MAC_PTRL_START_ADD(mcp, bank);
1240 
1241 	return (0);
1242 }
1243 
1244 static void
1245 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p)
1246 {
1247 	ASSERT(p != NULL);
1248 	p->ri_next = *q;
1249 	*q = p;
1250 }
1251 
1252 static mc_retry_info_t *
1253 mc_retry_info_get(mc_retry_info_t **q)
1254 {
1255 	mc_retry_info_t *p;
1256 
1257 	if ((p = *q) != NULL) {
1258 		*q = p->ri_next;
1259 		return (p);
1260 	} else {
1261 		return (NULL);
1262 	}
1263 }
1264 
1265 /*
1266  * Rewriting is used for two purposes.
1267  *  - to correct the error in memory.
1268  *  - to determine whether the error is permanent or intermittent.
1269  * It's done by writing the address in MAC_BANKm_REWRITE_ADD
1270  * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that,
1271  * REW_END (and REW_CE/REW_UE if some error detected) is set when
1272  * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM.
1273  *
1274  * Note that rewrite operation doesn't change RAW_UE to Marked UE.
1275  * Therefore, we use it only CE case.
1276  */
1277 
1278 static uint32_t
1279 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying)
1280 {
1281 	uint32_t cntl;
1282 	int count = 0;
1283 	int max_count;
1284 	int retry_state;
1285 
1286 	if (retrying)
1287 		max_count = 1;
1288 	else
1289 		max_count = mc_max_rewrite_loop;
1290 
1291 	retry_state = RETRY_STATE_PENDING;
1292 
1293 	if (!retrying && MC_REWRITE_MODE(mcp, bank)) {
1294 		goto timeout;
1295 	}
1296 
1297 	retry_state = RETRY_STATE_ACTIVE;
1298 
1299 	/* first wait to make sure PTRL_STATUS is 0 */
1300 	while (count++ < max_count) {
1301 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1302 		if (!(cntl & MAC_CNTL_PTRL_STATUS)) {
1303 			count = 0;
1304 			break;
1305 		}
1306 		drv_usecwait(mc_rewrite_delay);
1307 	}
1308 	if (count >= max_count)
1309 		goto timeout;
1310 
1311 	count = 0;
1312 
1313 	ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr);
1314 	MAC_REW_REQ(mcp, bank);
1315 
1316 	retry_state = RETRY_STATE_REWRITE;
1317 
1318 	do {
1319 		if (count++ > max_count) {
1320 			goto timeout;
1321 		} else {
1322 			drv_usecwait(mc_rewrite_delay);
1323 		}
1324 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1325 	/*
1326 	 * If there are other MEMORY or PCI activities, this
1327 	 * will be BUSY, else it should be set immediately
1328 	 */
1329 	} while (!(cntl & MAC_CNTL_REW_END));
1330 
1331 	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
1332 	return (cntl);
1333 timeout:
1334 	mc_set_rewrite(mcp, bank, dimm_addr, retry_state);
1335 
1336 	return (0);
1337 }
1338 
1339 void
1340 mc_clear_rewrite(mc_opl_t *mcp, int bank)
1341 {
1342 	struct mc_bank *bankp;
1343 	mc_retry_info_t *retry;
1344 	uint32_t rew_addr;
1345 
1346 	bankp = &(mcp->mc_bank[bank]);
1347 	retry = bankp->mcb_active;
1348 	bankp->mcb_active = NULL;
1349 	mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1350 
1351 again:
1352 	bankp->mcb_rewrite_count = 0;
1353 
1354 	while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) {
1355 		rew_addr = retry->ri_addr;
1356 		mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1357 		if (do_rewrite(mcp, bank, rew_addr, 1) == 0)
1358 			break;
1359 	}
1360 
1361 	/* we break out if no more pending rewrite or we got timeout again */
1362 
1363 	if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1364 		if (!IS_MIRROR(mcp, bank)) {
1365 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1366 		} else {
1367 			int mbank = bank ^ 1;
1368 			bankp = &(mcp->mc_bank[mbank]);
1369 			if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1370 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1371 			MC_CLEAR_REWRITE_MODE(mcp, mbank);
1372 			} else {
1373 			bank = mbank;
1374 			goto again;
1375 			}
1376 		}
1377 	}
1378 }
1379 
1380 void
1381 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state)
1382 {
1383 	mc_retry_info_t *retry;
1384 	struct mc_bank *bankp;
1385 
1386 	bankp = &mcp->mc_bank[bank];
1387 
1388 	retry = mc_retry_info_get(&bankp->mcb_retry_freelist);
1389 
1390 	ASSERT(retry != NULL);
1391 
1392 	retry->ri_addr = addr;
1393 	retry->ri_state = state;
1394 
1395 	MC_SET_REWRITE_MODE(mcp, bank);
1396 
1397 	if ((state > RETRY_STATE_PENDING)) {
1398 		ASSERT(bankp->mcb_active == NULL);
1399 		bankp->mcb_active = retry;
1400 	} else {
1401 		mc_retry_info_put(&bankp->mcb_retry_pending, retry);
1402 	}
1403 
1404 	if (IS_MIRROR(mcp, bank)) {
1405 		int mbank = bank ^1;
1406 		MC_SET_REWRITE_MODE(mcp, mbank);
1407 	}
1408 }
1409 
1410 void
1411 mc_process_scf_log(mc_opl_t *mcp)
1412 {
1413 	int count;
1414 	int n = 0;
1415 	scf_log_t *p;
1416 	int bank;
1417 
1418 	for (bank = 0; bank < BANKNUM_PER_SB; bank++) {
1419 		while ((p = mcp->mc_scf_log[bank]) != NULL &&
1420 		    (n < mc_max_errlog_processed)) {
1421 		ASSERT(bank == p->sl_bank);
1422 		count = 0;
1423 		while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank))
1424 		    & MAC_STATIC_ERR_VLD)) {
1425 			if (count++ >= (mc_max_scf_loop)) {
1426 				break;
1427 			}
1428 			drv_usecwait(mc_scf_delay);
1429 		}
1430 
1431 		if (count < mc_max_scf_loop) {
1432 			ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank),
1433 			    p->sl_err_log);
1434 
1435 			ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank),
1436 			    p->sl_err_add|MAC_STATIC_ERR_VLD);
1437 			mcp->mc_scf_retry[bank] = 0;
1438 		} else {
1439 			/*
1440 			 * if we try too many times, just drop the req
1441 			 */
1442 			if (mcp->mc_scf_retry[bank]++ <=
1443 			    mc_max_scf_retry) {
1444 				return;
1445 			} else {
1446 				if ((++mc_pce_dropped & 0xff) == 0) {
1447 					cmn_err(CE_WARN, "Cannot "
1448 					    "report Permanent CE to "
1449 					    "SCF\n");
1450 				}
1451 			}
1452 		}
1453 		n++;
1454 		mcp->mc_scf_log[bank] = p->sl_next;
1455 		mcp->mc_scf_total[bank]--;
1456 		ASSERT(mcp->mc_scf_total[bank] >= 0);
1457 		kmem_free(p, sizeof (scf_log_t));
1458 		}
1459 	}
1460 }
1461 void
1462 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank)
1463 {
1464 	scf_log_t *p;
1465 
1466 	if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) {
1467 		if ((++mc_pce_dropped & 0xff) == 0) {
1468 			cmn_err(CE_WARN, "Too many Permanent CE requests.\n");
1469 		}
1470 		return;
1471 	}
1472 	p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP);
1473 	p->sl_next = 0;
1474 	p->sl_err_add = flt_stat->mf_err_add;
1475 	p->sl_err_log = flt_stat->mf_err_log;
1476 	p->sl_bank = bank;
1477 
1478 	if (mcp->mc_scf_log[bank] == NULL) {
1479 		/*
1480 		 * we rely on mc_scf_log to detect NULL queue.
1481 		 * mc_scf_log_tail is irrelevant is such case.
1482 		 */
1483 		mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p;
1484 	} else {
1485 		mcp->mc_scf_log_tail[bank]->sl_next = p;
1486 		mcp->mc_scf_log_tail[bank] = p;
1487 	}
1488 	mcp->mc_scf_total[bank]++;
1489 }
1490 /*
1491  * This routine determines what kind of CE happens, intermittent
1492  * or permanent as follows. (See 4.7.3 in Columbus2 PRM.)
1493  * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register.
1494  * - If CE is still detected on the same address even after doing
1495  *   rewrite operation twice, it is determined as permanent error.
1496  * - If error is not detected anymore, it is determined as intermittent
1497  *   error.
1498  * - If UE is detected due to rewrite operation, it should be treated
1499  *   as UE.
1500  */
1501 
1502 /* ARGSUSED */
1503 static void
1504 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error)
1505 {
1506 	uint32_t cntl;
1507 	int i;
1508 
1509 	flt_stat->mf_type = FLT_TYPE_PERMANENT_CE;
1510 	/*
1511 	 * rewrite request 1st time reads and correct error data
1512 	 * and write to DIMM.  2nd rewrite request must be issued
1513 	 * after REW_CE/UE/END is 0.  When the 2nd request is completed,
1514 	 * if REW_CE = 1, then it is permanent CE.
1515 	 */
1516 	for (i = 0; i < 2; i++) {
1517 		cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0);
1518 
1519 		if (cntl == 0) {
1520 			/* timeout case */
1521 			return;
1522 		}
1523 		/*
1524 		 * If the error becomes UE or CMPE
1525 		 * we return to the caller immediately.
1526 		 */
1527 		if (cntl & MAC_CNTL_REW_UE) {
1528 			if (ptrl_error)
1529 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE;
1530 			else
1531 				flt_stat->mf_cntl |= MAC_CNTL_MI_UE;
1532 			flt_stat->mf_type = FLT_TYPE_UE;
1533 			return;
1534 		}
1535 		if (cntl & MAC_CNTL_REW_CMPE) {
1536 			if (ptrl_error)
1537 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE;
1538 			else
1539 				flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE;
1540 			flt_stat->mf_type = FLT_TYPE_CMPE;
1541 			return;
1542 		}
1543 	}
1544 	if (!(cntl & MAC_CNTL_REW_CE)) {
1545 		flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE;
1546 	}
1547 
1548 	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1549 		/* report PERMANENT_CE to SP via SCF */
1550 		if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) {
1551 			mc_queue_scf_log(mcp, flt_stat, bank);
1552 		}
1553 	}
1554 }
1555 
1556 #define	IS_CMPE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\
1557 				MAC_CNTL_MI_CMPE))
1558 #define	IS_UE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE))
1559 #define	IS_CE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE))
1560 #define	IS_OK(cntl, f)	(!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \
1561 			MAC_CNTL_MI_ERRS)))
1562 
1563 
1564 static int
1565 IS_CE_ONLY(uint32_t cntl, int ptrl_error)
1566 {
1567 	if (ptrl_error) {
1568 		return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE);
1569 	} else {
1570 		return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE);
1571 	}
1572 }
1573 
1574 void
1575 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value)
1576 {
1577 	int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
1578 
1579 	if (mcp->mc_speedup_period[ebank] > 0)
1580 		value |= mc_max_speed;
1581 	else
1582 		value |= mcp->mc_speed;
1583 	ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value);
1584 }
1585 
1586 static void
1587 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1588 {
1589 	flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1590 	    MAC_CNTL_PTRL_ERRS;
1591 	flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank));
1592 	flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank));
1593 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1594 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1595 	flt_stat->mf_flt_maddr.ma_bank = bank;
1596 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1597 }
1598 
1599 static void
1600 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1601 {
1602 	uint32_t status, old_status;
1603 
1604 	status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS;
1605 	old_status = 0;
1606 
1607 	/* we keep reading until the status is stable */
1608 	while (old_status != status) {
1609 		old_status = status;
1610 		flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank));
1611 		flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank));
1612 		status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1613 		    MAC_CNTL_MI_ERRS;
1614 		if (status == old_status) {
1615 			break;
1616 		}
1617 	}
1618 
1619 	flt_stat->mf_cntl = status;
1620 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1621 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1622 	flt_stat->mf_flt_maddr.ma_bank = bank;
1623 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1624 }
1625 
1626 
1627 /*
1628  * Error philosophy for mirror mode:
1629  *
1630  * PTRL (The error address for both banks are same, since ptrl stops if it
1631  * detects error.)
1632  * - Compare error  log CMPE.
1633  *
1634  * - UE-UE           Report MUE.  No rewrite.
1635  *
1636  * - UE-*	     UE-(CE/OK). Rewrite to scrub UE.  Report SUE.
1637  *
1638  * - CE-*            CE-(CE/OK). Scrub to determine if CE is permanent.
1639  *                   If CE is permanent, inform SCF.  Once for each
1640  *		     Dimm.  If CE becomes UE or CMPE, go back to above.
1641  *
1642  *
1643  * MI (The error addresses for each bank are the same or different.)
1644  * - Compare  error  If addresses are the same.  Just CMPE, so log CMPE.
1645  *		     If addresses are different (this could happen
1646  *		     as a result of scrubbing.  Report each separately.
1647  *		     Only report error info on each side.
1648  *
1649  * - UE-UE           Addresses are the same.  Report MUE.
1650  *		     Addresses are different.  Report SUE on each bank.
1651  *		     Rewrite to clear UE.
1652  *
1653  * - UE-*	     UE-(CE/OK)
1654  *		     Rewrite to clear UE.  Report SUE for the bank.
1655  *
1656  * - CE-*            CE-(CE/OK).  Scrub to determine if CE is permanent.
1657  *                   If CE becomes UE or CMPE, go back to above.
1658  *
1659  */
1660 
1661 static int
1662 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat)
1663 {
1664 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1665 	int i;
1666 	int rv = 0;
1667 	int bank;
1668 	int rewrite_timeout = 0;
1669 
1670 	MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n",
1671 	    flt_stat[0].mf_cntl, flt_stat[1].mf_cntl);
1672 
1673 	if (ptrl_error) {
1674 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1675 		    MAC_CNTL_PTRL_ERRS) == 0)
1676 			return (0);
1677 	} else {
1678 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1679 		    MAC_CNTL_MI_ERRS) == 0)
1680 			return (0);
1681 	}
1682 
1683 	/*
1684 	 * First we take care of the case of CE
1685 	 * because they can become UE or CMPE
1686 	 */
1687 	for (i = 0; i < 2; i++) {
1688 		if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) {
1689 			bank = flt_stat[i].mf_flt_maddr.ma_bank;
1690 			MC_LOG("CE detected on bank %d\n", bank);
1691 			mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error);
1692 			if (MC_REWRITE_ACTIVE(mcp, bank)) {
1693 				rewrite_timeout = 1;
1694 			}
1695 			rv = 1;
1696 		}
1697 	}
1698 
1699 	if (rewrite_timeout)
1700 		return (0);
1701 
1702 	/* The above scrubbing can turn CE into UE or CMPE */
1703 
1704 	/*
1705 	 * Now we distinguish two cases: same address or not
1706 	 * the same address.  It might seem more intuitive to
1707 	 * distinguish PTRL v.s. MI error but it is more
1708 	 * complicated that way.
1709 	 */
1710 
1711 	if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) {
1712 
1713 		if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) ||
1714 		    IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) {
1715 			flt_stat[0].mf_type = FLT_TYPE_CMPE;
1716 			flt_stat[1].mf_type = FLT_TYPE_CMPE;
1717 			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1718 			mc_aflt->mflt_nflts = 2;
1719 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1720 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1721 			mc_aflt->mflt_pr = PR_UE;
1722 			/*
1723 			 * Compare error is result of MAC internal error, so
1724 			 * simply log it instead of publishing an ereport. SCF
1725 			 * diagnoses all the MAC internal and its i/f error.
1726 			 * mc_err_drain(mc_aflt);
1727 			 */
1728 			MC_LOG("cmpe error detected\n");
1729 			return (1);
1730 		}
1731 
1732 		if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) &&
1733 		    IS_UE(flt_stat[1].mf_cntl, ptrl_error)) {
1734 			/* Both side are UE's */
1735 
1736 			MAC_SET_ERRLOG_INFO(&flt_stat[0]);
1737 			MAC_SET_ERRLOG_INFO(&flt_stat[1]);
1738 			MC_LOG("MUE detected\n");
1739 			flt_stat[0].mf_type = FLT_TYPE_MUE;
1740 			flt_stat[1].mf_type = FLT_TYPE_MUE;
1741 			mc_aflt->mflt_erpt_class = MC_OPL_MUE;
1742 			mc_aflt->mflt_nflts = 2;
1743 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1744 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1745 			mc_aflt->mflt_pr = PR_UE;
1746 			mc_err_drain(mc_aflt);
1747 			return (1);
1748 		}
1749 
1750 		/* Now the only case is UE/CE, UE/OK, or don't care */
1751 		for (i = 0; i < 2; i++) {
1752 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1753 
1754 			/* rewrite can clear the one side UE error */
1755 
1756 			if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) {
1757 				(void) do_rewrite(mcp,
1758 				    flt_stat[i].mf_flt_maddr.ma_bank,
1759 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0);
1760 			}
1761 			flt_stat[i].mf_type = FLT_TYPE_UE;
1762 			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1763 			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1764 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1765 			mc_aflt->mflt_nflts = 1;
1766 			mc_aflt->mflt_pr = PR_MCE;
1767 			mc_err_drain(mc_aflt);
1768 			/* Once we hit a UE/CE or UE/OK case, done */
1769 			return (1);
1770 			}
1771 		}
1772 
1773 	} else {
1774 		/*
1775 		 * addresses are different. That means errors
1776 		 * on the 2 banks are not related at all.
1777 		 */
1778 		for (i = 0; i < 2; i++) {
1779 			if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) {
1780 				flt_stat[i].mf_type = FLT_TYPE_CMPE;
1781 				mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1782 				mc_aflt->mflt_nflts = 1;
1783 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1784 				mc_aflt->mflt_pr = PR_UE;
1785 				/*
1786 				 * Compare error is result of MAC internal
1787 				 * error, so simply log it instead of
1788 				 * publishing an ereport. SCF diagnoses all
1789 				 * the MAC internal and its interface error.
1790 				 * mc_err_drain(mc_aflt);
1791 				 */
1792 				MC_LOG("cmpe error detected\n");
1793 				/* no more report on this bank */
1794 				flt_stat[i].mf_cntl = 0;
1795 				rv = 1;
1796 			}
1797 		}
1798 
1799 		/* rewrite can clear the one side UE error */
1800 
1801 		for (i = 0; i < 2; i++) {
1802 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1803 				(void) do_rewrite(mcp,
1804 				    flt_stat[i].mf_flt_maddr.ma_bank,
1805 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr,
1806 				    0);
1807 				flt_stat[i].mf_type = FLT_TYPE_UE;
1808 				MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1809 				mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1810 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1811 				mc_aflt->mflt_nflts = 1;
1812 				mc_aflt->mflt_pr = PR_MCE;
1813 				mc_err_drain(mc_aflt);
1814 				rv = 1;
1815 			}
1816 		}
1817 	}
1818 	return (rv);
1819 }
1820 static void
1821 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1822 {
1823 	mc_aflt_t mc_aflt;
1824 	mc_flt_stat_t flt_stat[2], mi_flt_stat[2];
1825 	int i;
1826 	int mi_valid;
1827 
1828 	ASSERT(rsaddr);
1829 
1830 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1831 	bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t));
1832 	bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t));
1833 
1834 
1835 	mc_aflt.mflt_mcp = mcp;
1836 	mc_aflt.mflt_id = gethrtime();
1837 
1838 	/* Now read all the registers into flt_stat */
1839 
1840 	for (i = 0; i < 2; i++) {
1841 		MC_LOG("Reading registers of bank %d\n", bank);
1842 		/* patrol registers */
1843 		mc_read_ptrl_reg(mcp, bank, &flt_stat[i]);
1844 
1845 		/*
1846 		 * In mirror mode, it is possible that only one bank
1847 		 * may report the error. We need to check for it to
1848 		 * ensure we pick the right addr value for patrol restart.
1849 		 * Note that if both banks reported errors, we pick the
1850 		 * 2nd one. Both banks should reported the same error address.
1851 		 */
1852 		if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS)
1853 			rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr;
1854 
1855 		MC_LOG("ptrl registers cntl %x add %x log %x\n",
1856 		    flt_stat[i].mf_cntl, flt_stat[i].mf_err_add,
1857 		    flt_stat[i].mf_err_log);
1858 
1859 		/* MI registers */
1860 		mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]);
1861 
1862 		MC_LOG("MI registers cntl %x add %x log %x\n",
1863 		    mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add,
1864 		    mi_flt_stat[i].mf_err_log);
1865 
1866 		bank = bank^1;
1867 	}
1868 
1869 	/* clear errors once we read all the registers */
1870 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1871 
1872 	MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1873 
1874 	/* Process MI errors first */
1875 
1876 	/* if not error mode, cntl1 is 0 */
1877 	if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1878 	    (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1879 		mi_flt_stat[0].mf_cntl = 0;
1880 
1881 	if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1882 	    (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1883 		mi_flt_stat[1].mf_cntl = 0;
1884 
1885 	mc_aflt.mflt_is_ptrl = 0;
1886 	mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]);
1887 
1888 	if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1889 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl &
1890 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1891 	    (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) &&
1892 	    (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1893 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl &
1894 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1895 	    (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) {
1896 #ifdef DEBUG
1897 		MC_LOG("discarding PTRL error because "
1898 		    "it is the same as MI\n");
1899 #endif
1900 		rsaddr->mi_valid = mi_valid;
1901 		return;
1902 	}
1903 	/* if not error mode, cntl1 is 0 */
1904 	if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1905 	    (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1906 		flt_stat[0].mf_cntl = 0;
1907 
1908 	if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1909 	    (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1910 		flt_stat[1].mf_cntl = 0;
1911 
1912 	mc_aflt.mflt_is_ptrl = 1;
1913 	rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]);
1914 }
1915 static int
1916 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt,
1917 	mc_flt_stat_t *flt_stat)
1918 {
1919 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1920 	int rv = 0;
1921 
1922 	mc_aflt->mflt_erpt_class = NULL;
1923 	if (IS_UE(flt_stat->mf_cntl, ptrl_error)) {
1924 		MC_LOG("UE detected\n");
1925 		flt_stat->mf_type = FLT_TYPE_UE;
1926 		mc_aflt->mflt_erpt_class = MC_OPL_UE;
1927 		mc_aflt->mflt_pr = PR_UE;
1928 		MAC_SET_ERRLOG_INFO(flt_stat);
1929 		rv = 1;
1930 	} else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) {
1931 		MC_LOG("CE detected\n");
1932 		MAC_SET_ERRLOG_INFO(flt_stat);
1933 
1934 		/* Error type can change after scrubbing */
1935 		mc_scrub_ce(mcp, bank, flt_stat, ptrl_error);
1936 		if (MC_REWRITE_ACTIVE(mcp, bank)) {
1937 			return (0);
1938 		}
1939 
1940 		if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) {
1941 			mc_aflt->mflt_erpt_class = MC_OPL_ICE;
1942 			mc_aflt->mflt_pr = PR_MCE;
1943 		} else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1944 			mc_aflt->mflt_erpt_class = MC_OPL_CE;
1945 			mc_aflt->mflt_pr = PR_MCE;
1946 		} else if (flt_stat->mf_type == FLT_TYPE_UE) {
1947 			mc_aflt->mflt_erpt_class = MC_OPL_UE;
1948 			mc_aflt->mflt_pr = PR_UE;
1949 		}
1950 		rv = 1;
1951 	}
1952 	MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type,
1953 	    mc_aflt->mflt_erpt_class);
1954 	if (mc_aflt->mflt_erpt_class) {
1955 		mc_aflt->mflt_stat[0] = flt_stat;
1956 		mc_aflt->mflt_nflts = 1;
1957 		mc_err_drain(mc_aflt);
1958 	}
1959 	return (rv);
1960 }
1961 
1962 static void
1963 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1964 {
1965 	mc_aflt_t mc_aflt;
1966 	mc_flt_stat_t flt_stat, mi_flt_stat;
1967 	int mi_valid;
1968 
1969 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1970 	bzero(&flt_stat, sizeof (mc_flt_stat_t));
1971 	bzero(&mi_flt_stat, sizeof (mc_flt_stat_t));
1972 
1973 	mc_aflt.mflt_mcp = mcp;
1974 	mc_aflt.mflt_id = gethrtime();
1975 
1976 	/* patrol registers */
1977 	mc_read_ptrl_reg(mcp, bank, &flt_stat);
1978 
1979 	ASSERT(rsaddr);
1980 	rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr;
1981 
1982 	MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl,
1983 	    flt_stat.mf_err_add, flt_stat.mf_err_log);
1984 
1985 	/* MI registers */
1986 	mc_read_mi_reg(mcp, bank, &mi_flt_stat);
1987 
1988 
1989 	MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl,
1990 	    mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log);
1991 
1992 	/* clear errors once we read all the registers */
1993 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1994 
1995 	mc_aflt.mflt_is_ptrl = 0;
1996 	if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) &&
1997 	    ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
1998 	    ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
1999 		mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat);
2000 	}
2001 
2002 	if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >>
2003 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl &
2004 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
2005 	    (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) {
2006 #ifdef DEBUG
2007 		MC_LOG("discarding PTRL error because "
2008 		    "it is the same as MI\n");
2009 #endif
2010 		rsaddr->mi_valid = mi_valid;
2011 		return;
2012 	}
2013 
2014 	mc_aflt.mflt_is_ptrl = 1;
2015 	if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) &&
2016 	    ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2017 	    ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2018 		rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt,
2019 		    &flt_stat);
2020 	}
2021 }
2022 /*
2023  *	memory patrol error handling algorithm:
2024  *	timeout() is used to do periodic polling
2025  *	This is the flow chart.
2026  *	timeout ->
2027  *	mc_check_errors()
2028  *	    if memory bank is installed, read the status register
2029  *	    if any error bit is set,
2030  *	    -> mc_error_handler()
2031  *		-> read all error registers
2032  *	        -> mc_process_error()
2033  *	            determine error type
2034  *	            rewrite to clear error or scrub to determine CE type
2035  *	            inform SCF on permanent CE
2036  *	        -> mc_err_drain
2037  *	            page offline processing
2038  *	            -> mc_ereport_post()
2039  */
2040 
2041 static void
2042 mc_process_rewrite(mc_opl_t *mcp, int bank)
2043 {
2044 	uint32_t rew_addr, cntl;
2045 	mc_retry_info_t *retry;
2046 	struct mc_bank *bankp;
2047 
2048 	bankp = &(mcp->mc_bank[bank]);
2049 	retry = bankp->mcb_active;
2050 	if (retry == NULL)
2051 		return;
2052 
2053 	if (retry->ri_state <= RETRY_STATE_ACTIVE) {
2054 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
2055 		if (cntl & MAC_CNTL_PTRL_STATUS)
2056 			return;
2057 		rew_addr = retry->ri_addr;
2058 		ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr);
2059 		MAC_REW_REQ(mcp, bank);
2060 
2061 		retry->ri_state = RETRY_STATE_REWRITE;
2062 	}
2063 
2064 	cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank));
2065 
2066 	if (cntl & MAC_CNTL_REW_END) {
2067 		MAC_CLEAR_ERRS(mcp, bank,
2068 		    MAC_CNTL_REW_ERRS);
2069 		mc_clear_rewrite(mcp, bank);
2070 	} else {
2071 		/*
2072 		 * If the rewrite does not complete in
2073 		 * 1 hour, we have to consider this a HW
2074 		 * failure.  However, there is no recovery
2075 		 * mechanism.  The only thing we can do
2076 		 * to to print a warning message to the
2077 		 * console.  We continue to increment the
2078 		 * counter but we only print the message
2079 		 * once.  It will take the counter a long
2080 		 * time to wrap around and the user might
2081 		 * see a second message.  In practice,
2082 		 * we have never hit this condition but
2083 		 * we have to keep the code here just in case.
2084 		 */
2085 		if (++mcp->mc_bank[bank].mcb_rewrite_count
2086 		    == mc_max_rewrite_retry) {
2087 			cmn_err(CE_WARN, "Memory patrol feature is"
2088 			" partly suspended on /LSB%d/B%d"
2089 			" due to heavy memory load,"
2090 			" and it will restart"
2091 			" automatically.\n", mcp->mc_board_num,
2092 			    bank);
2093 		}
2094 	}
2095 }
2096 
2097 static void
2098 mc_check_errors_func(mc_opl_t *mcp)
2099 {
2100 	mc_rsaddr_info_t rsaddr_info;
2101 	int i, error_count = 0;
2102 	uint32_t stat, cntl;
2103 	int running;
2104 	int wrapped;
2105 	int ebk;
2106 
2107 	/*
2108 	 * scan errors.
2109 	 */
2110 	if (mcp->mc_status & MC_MEMORYLESS)
2111 		return;
2112 
2113 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2114 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2115 			if (MC_REWRITE_ACTIVE(mcp, i)) {
2116 				mc_process_rewrite(mcp, i);
2117 			}
2118 			stat = ldphysio(MAC_PTRL_STAT(mcp, i));
2119 			cntl = ldphysio(MAC_PTRL_CNTL(mcp, i));
2120 			running = cntl & MAC_CNTL_PTRL_START;
2121 			wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX;
2122 
2123 			/* Compute the effective bank idx */
2124 			ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i;
2125 
2126 			if (mc_debug_show_all || stat) {
2127 				MC_LOG("/LSB%d/B%d stat %x cntl %x\n",
2128 				    mcp->mc_board_num, i, stat, cntl);
2129 			}
2130 
2131 			/*
2132 			 * Update stats and reset flag if the HW patrol
2133 			 * wrapped around in its scan.
2134 			 */
2135 			if (wrapped) {
2136 				MAC_CLEAR_MAX(mcp, i);
2137 				mcp->mc_period[ebk]++;
2138 				if (IS_MIRROR(mcp, i))
2139 					MC_LOG("mirror mc period %ld on "
2140 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2141 					    mcp->mc_board_num, i);
2142 				else {
2143 					MC_LOG("mc period %ld on "
2144 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2145 					    mcp->mc_board_num, i);
2146 				}
2147 			}
2148 
2149 			if (running) {
2150 				/*
2151 				 * Mac patrol HW is still running.
2152 				 * Normally when an error is detected,
2153 				 * the HW patrol will stop so that we
2154 				 * can collect error data for reporting.
2155 				 * Certain errors (MI errors) detected may not
2156 				 * cause the HW patrol to stop which is a
2157 				 * problem since we cannot read error data while
2158 				 * the HW patrol is running. SW is not allowed
2159 				 * to stop the HW patrol while it is running
2160 				 * as it may cause HW inconsistency. This is
2161 				 * described in a HW errata.
2162 				 * In situations where we detected errors
2163 				 * that may not cause the HW patrol to stop.
2164 				 * We speed up the HW patrol scanning in
2165 				 * the hope that it will find the 'real' PTRL
2166 				 * errors associated with the previous errors
2167 				 * causing the HW to finally stop so that we
2168 				 * can do the reporting.
2169 				 */
2170 				/*
2171 				 * Check to see if we did speed up
2172 				 * the HW patrol due to previous errors
2173 				 * detected that did not cause the patrol
2174 				 * to stop. We only do it if HW patrol scan
2175 				 * wrapped (counted as completing a 'period').
2176 				 */
2177 				if (mcp->mc_speedup_period[ebk] > 0) {
2178 					if (wrapped &&
2179 					    (--mcp->mc_speedup_period[ebk] ==
2180 					    0)) {
2181 						/*
2182 						 * We did try to speed up.
2183 						 * The speed up period has
2184 						 * expired and the HW patrol
2185 						 * is still running.  The
2186 						 * errors must be intermittent.
2187 						 * We have no choice but to
2188 						 * ignore them, reset the scan
2189 						 * speed to normal and clear
2190 						 * the MI error bits. For
2191 						 * mirror mode, we need to
2192 						 * clear errors on both banks.
2193 						 */
2194 						MC_LOG("Clearing MI errors\n");
2195 						MAC_CLEAR_ERRS(mcp, i,
2196 						    MAC_CNTL_MI_ERRS);
2197 
2198 						if (IS_MIRROR(mcp, i)) {
2199 							MC_LOG("Clearing "
2200 							    "Mirror MI errs\n");
2201 							MAC_CLEAR_ERRS(mcp,
2202 							    i^1,
2203 							    MAC_CNTL_MI_ERRS);
2204 						}
2205 					}
2206 				} else if (stat & MAC_STAT_MI_ERRS) {
2207 					/*
2208 					 * MI errors detected but we cannot
2209 					 * report them since the HW patrol
2210 					 * is still running.
2211 					 * We will attempt to speed up the
2212 					 * scanning and hopefully the HW
2213 					 * can detect PRTL errors at the same
2214 					 * location that cause the HW patrol
2215 					 * to stop.
2216 					 */
2217 					mcp->mc_speedup_period[ebk] = 2;
2218 					MAC_CMD(mcp, i, 0);
2219 				}
2220 			} else if (stat & (MAC_STAT_PTRL_ERRS |
2221 			    MAC_STAT_MI_ERRS)) {
2222 				/*
2223 				 * HW Patrol has stopped and we found errors.
2224 				 * Proceed to collect and report error info.
2225 				 */
2226 				mcp->mc_speedup_period[ebk] = 0;
2227 				rsaddr_info.mi_valid = 0;
2228 				rsaddr_info.mi_injectrestart = 0;
2229 				if (IS_MIRROR(mcp, i)) {
2230 					mc_error_handler_mir(mcp, i,
2231 					    &rsaddr_info);
2232 				} else {
2233 					mc_error_handler(mcp, i, &rsaddr_info);
2234 				}
2235 
2236 				error_count++;
2237 				restart_patrol(mcp, i, &rsaddr_info);
2238 			} else {
2239 				/*
2240 				 * HW patrol scan has apparently stopped
2241 				 * but no errors detected/flagged.
2242 				 * Restart the HW patrol just to be sure.
2243 				 * In mirror mode, the odd bank might have
2244 				 * reported errors that caused the patrol to
2245 				 * stop. We'll defer the restart to the odd
2246 				 * bank in this case.
2247 				 */
2248 				if (!IS_MIRROR(mcp, i) || (i & 0x1))
2249 					restart_patrol(mcp, i, NULL);
2250 			}
2251 		}
2252 	}
2253 	if (error_count > 0)
2254 		mcp->mc_last_error += error_count;
2255 	else
2256 		mcp->mc_last_error = 0;
2257 }
2258 
2259 /*
2260  * mc_polling -- Check errors for only one instance,
2261  * but process errors for all instances to make sure we drain the errors
2262  * faster than they can be accumulated.
2263  *
2264  * Polling on each board should be done only once per each
2265  * mc_patrol_interval_sec.  This is equivalent to setting mc_tick_left
2266  * to OPL_MAX_BOARDS and decrement by 1 on each timeout.
2267  * Once mc_tick_left becomes negative, the board becomes a candidate
2268  * for polling because it has waited for at least
2269  * mc_patrol_interval_sec's long.    If mc_timeout_period is calculated
2270  * differently, this has to be updated accordingly.
2271  */
2272 
2273 static void
2274 mc_polling(void)
2275 {
2276 	int i, scan_error;
2277 	mc_opl_t *mcp;
2278 
2279 
2280 	scan_error = 1;
2281 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2282 		mutex_enter(&mcmutex);
2283 		if ((mcp = mc_instances[i]) == NULL) {
2284 			mutex_exit(&mcmutex);
2285 			continue;
2286 		}
2287 		mutex_enter(&mcp->mc_lock);
2288 		mutex_exit(&mcmutex);
2289 		if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2290 			mutex_exit(&mcp->mc_lock);
2291 			continue;
2292 		}
2293 		if (scan_error && mcp->mc_tick_left <= 0) {
2294 			mc_check_errors_func((void *)mcp);
2295 			mcp->mc_tick_left = OPL_MAX_BOARDS;
2296 			scan_error = 0;
2297 		} else {
2298 			mcp->mc_tick_left--;
2299 		}
2300 		mc_process_scf_log(mcp);
2301 		mutex_exit(&mcp->mc_lock);
2302 	}
2303 }
2304 
2305 static void
2306 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr)
2307 {
2308 	maddr->ma_bd = mcp->mc_board_num;
2309 	maddr->ma_bank = bank;
2310 	maddr->ma_dimm_addr = 0;
2311 }
2312 
2313 typedef struct mc_mem_range {
2314 	uint64_t	addr;
2315 	uint64_t	size;
2316 } mc_mem_range_t;
2317 
2318 static int
2319 get_base_address(mc_opl_t *mcp)
2320 {
2321 	mc_mem_range_t *mem_range;
2322 	int len;
2323 
2324 	if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2325 	    "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) {
2326 		return (DDI_FAILURE);
2327 	}
2328 
2329 	mcp->mc_start_address = mem_range->addr;
2330 	mcp->mc_size = mem_range->size;
2331 
2332 	kmem_free(mem_range, len);
2333 	return (DDI_SUCCESS);
2334 }
2335 
2336 struct mc_addr_spec {
2337 	uint32_t bank;
2338 	uint32_t phys_hi;
2339 	uint32_t phys_lo;
2340 };
2341 
2342 #define	REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo)
2343 
2344 static char *mc_tbl_name[] = {
2345 	"cs0-mc-pa-trans-table",
2346 	"cs1-mc-pa-trans-table"
2347 };
2348 
2349 /*
2350  * This routine performs a rangecheck for a given PA
2351  * to see if it belongs to the memory range for this board.
2352  * Return 1 if it is valid (within the range) and 0 otherwise
2353  */
2354 static int
2355 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa)
2356 {
2357 	if ((pa < mcp->mc_start_address) || (mcp->mc_start_address +
2358 	    mcp->mc_size <= pa))
2359 		return (0);
2360 	else
2361 		return (1);
2362 }
2363 
2364 static void
2365 mc_memlist_delete(struct memlist *mlist)
2366 {
2367 	struct memlist *ml;
2368 
2369 	for (ml = mlist; ml; ml = mlist) {
2370 		mlist = ml->next;
2371 		kmem_free(ml, sizeof (struct memlist));
2372 	}
2373 }
2374 
2375 static struct memlist *
2376 mc_memlist_dup(struct memlist *mlist)
2377 {
2378 	struct memlist *hl = NULL, *tl, **mlp;
2379 
2380 	if (mlist == NULL)
2381 		return (NULL);
2382 
2383 	mlp = &hl;
2384 	tl = *mlp;
2385 	for (; mlist; mlist = mlist->next) {
2386 		*mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP);
2387 		(*mlp)->address = mlist->address;
2388 		(*mlp)->size = mlist->size;
2389 		(*mlp)->prev = tl;
2390 		tl = *mlp;
2391 		mlp = &((*mlp)->next);
2392 	}
2393 	*mlp = NULL;
2394 
2395 	return (hl);
2396 }
2397 
2398 
2399 static struct memlist *
2400 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
2401 {
2402 	uint64_t	end;
2403 	struct memlist	*ml, *tl, *nlp;
2404 
2405 	if (mlist == NULL)
2406 		return (NULL);
2407 
2408 	end = base + len;
2409 	if ((end <= mlist->address) || (base == end))
2410 		return (mlist);
2411 
2412 	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
2413 		uint64_t	mend;
2414 
2415 		nlp = ml->next;
2416 
2417 		if (end <= ml->address)
2418 			break;
2419 
2420 		mend = ml->address + ml->size;
2421 		if (base < mend) {
2422 			if (base <= ml->address) {
2423 				ml->address = end;
2424 				if (end >= mend)
2425 					ml->size = 0ull;
2426 				else
2427 					ml->size = mend - ml->address;
2428 			} else {
2429 				ml->size = base - ml->address;
2430 				if (end < mend) {
2431 					struct memlist	*nl;
2432 					/*
2433 					 * splitting an memlist entry.
2434 					 */
2435 					nl = kmem_alloc(sizeof (struct memlist),
2436 					    KM_SLEEP);
2437 					nl->address = end;
2438 					nl->size = mend - nl->address;
2439 					if ((nl->next = nlp) != NULL)
2440 						nlp->prev = nl;
2441 					nl->prev = ml;
2442 					ml->next = nl;
2443 					nlp = nl;
2444 				}
2445 			}
2446 			if (ml->size == 0ull) {
2447 				if (ml == mlist) {
2448 					if ((mlist = nlp) != NULL)
2449 						nlp->prev = NULL;
2450 					kmem_free(ml, sizeof (struct memlist));
2451 					if (mlist == NULL)
2452 						break;
2453 					ml = nlp;
2454 				} else {
2455 					if ((tl->next = nlp) != NULL)
2456 						nlp->prev = tl;
2457 					kmem_free(ml, sizeof (struct memlist));
2458 					ml = tl;
2459 				}
2460 			}
2461 		}
2462 	}
2463 
2464 	return (mlist);
2465 }
2466 
2467 static void
2468 mc_get_mlist(mc_opl_t *mcp)
2469 {
2470 	struct memlist *mlist;
2471 
2472 	memlist_read_lock();
2473 	mlist = mc_memlist_dup(phys_install);
2474 	memlist_read_unlock();
2475 
2476 	if (mlist) {
2477 		mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address);
2478 	}
2479 
2480 	if (mlist) {
2481 		uint64_t startpa, endpa;
2482 
2483 		startpa = mcp->mc_start_address + mcp->mc_size;
2484 		endpa = ptob(physmax + 1);
2485 		if (endpa > startpa) {
2486 			mlist = mc_memlist_del_span(mlist, startpa,
2487 			    endpa - startpa);
2488 		}
2489 	}
2490 
2491 	if (mlist) {
2492 		mcp->mlist = mlist;
2493 	}
2494 }
2495 
2496 int
2497 mc_board_add(mc_opl_t *mcp)
2498 {
2499 	struct mc_addr_spec *macaddr;
2500 	cs_status_t *cs_status;
2501 	int len, len1, i, bk, cc;
2502 	mc_rsaddr_info_t rsaddr;
2503 	uint32_t mirr;
2504 	int nbanks = 0;
2505 	uint64_t nbytes = 0;
2506 	int mirror_mode = 0;
2507 	int ret;
2508 
2509 	/*
2510 	 * Get configurations from "pseudo-mc" node which includes:
2511 	 * board# : LSB number
2512 	 * mac-addr : physical base address of MAC registers
2513 	 * csX-mac-pa-trans-table: translation table from DIMM address
2514 	 *			to physical address or vice versa.
2515 	 */
2516 	mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip,
2517 	    DDI_PROP_DONTPASS, "board#", -1);
2518 
2519 	if (mcp->mc_board_num == -1) {
2520 		return (DDI_FAILURE);
2521 	}
2522 
2523 	/*
2524 	 * Get start address in this CAB. It can be gotten from
2525 	 * "sb-mem-ranges" property.
2526 	 */
2527 
2528 	if (get_base_address(mcp) == DDI_FAILURE) {
2529 		return (DDI_FAILURE);
2530 	}
2531 	/* get mac-pa trans tables */
2532 	for (i = 0; i < MC_TT_CS; i++) {
2533 		len = MC_TT_ENTRIES;
2534 		cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip,
2535 		    DDI_PROP_DONTPASS, mc_tbl_name[i],
2536 		    (caddr_t)mcp->mc_trans_table[i], &len);
2537 
2538 		if (cc != DDI_SUCCESS) {
2539 			bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES);
2540 		}
2541 	}
2542 	mcp->mlist = NULL;
2543 
2544 	mc_get_mlist(mcp);
2545 
2546 	/* initialize bank informations */
2547 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2548 	    "mc-addr", (caddr_t)&macaddr, &len);
2549 	if (cc != DDI_SUCCESS) {
2550 		cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc);
2551 		return (DDI_FAILURE);
2552 	}
2553 
2554 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2555 	    "cs-status", (caddr_t)&cs_status, &len1);
2556 
2557 	if (cc != DDI_SUCCESS) {
2558 		if (len > 0)
2559 			kmem_free(macaddr, len);
2560 		cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc);
2561 		return (DDI_FAILURE);
2562 	}
2563 	/* get the physical board number for a given logical board number */
2564 	mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num);
2565 
2566 	if (mcp->mc_phys_board_num < 0) {
2567 		if (len > 0)
2568 			kmem_free(macaddr, len);
2569 		cmn_err(CE_WARN, "Unable to obtain the physical board number");
2570 		return (DDI_FAILURE);
2571 	}
2572 
2573 	mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL);
2574 
2575 	for (i = 0; i < len1 / sizeof (cs_status_t); i++) {
2576 		nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) |
2577 		    ((uint64_t)cs_status[i].cs_avail_low);
2578 	}
2579 	if (len1 > 0)
2580 		kmem_free(cs_status, len1);
2581 	nbanks = len / sizeof (struct mc_addr_spec);
2582 
2583 	if (nbanks > 0)
2584 		nbytes /= nbanks;
2585 	else {
2586 		/* No need to free macaddr because len must be 0 */
2587 		mcp->mc_status |= MC_MEMORYLESS;
2588 		return (DDI_SUCCESS);
2589 	}
2590 
2591 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2592 		mcp->mc_scf_retry[i] = 0;
2593 		mcp->mc_period[i] = 0;
2594 		mcp->mc_speedup_period[i] = 0;
2595 	}
2596 
2597 	/*
2598 	 * Get the memory size here. Let it be B (bytes).
2599 	 * Let T be the time in u.s. to scan 64 bytes.
2600 	 * If we want to complete 1 round of scanning in P seconds.
2601 	 *
2602 	 *	B * T * 10^(-6)	= P
2603 	 *	---------------
2604 	 *		64
2605 	 *
2606 	 *	T = P * 64 * 10^6
2607 	 *	    -------------
2608 	 *		B
2609 	 *
2610 	 *	  = P * 64 * 10^6
2611 	 *	    -------------
2612 	 *		B
2613 	 *
2614 	 *	The timing bits are set in PTRL_CNTL[28:26] where
2615 	 *
2616 	 *	0	- 1 m.s
2617 	 *	1	- 512 u.s.
2618 	 *	10	- 256 u.s.
2619 	 *	11	- 128 u.s.
2620 	 *	100	- 64 u.s.
2621 	 *	101	- 32 u.s.
2622 	 *	110	- 0 u.s.
2623 	 *	111	- reserved.
2624 	 *
2625 	 *
2626 	 *	a[0] = 110, a[1] = 101, ... a[6] = 0
2627 	 *
2628 	 *	cs-status property is int x 7
2629 	 *	0 - cs#
2630 	 *	1 - cs-status
2631 	 *	2 - cs-avail.hi
2632 	 *	3 - cs-avail.lo
2633 	 *	4 - dimm-capa.hi
2634 	 *	5 - dimm-capa.lo
2635 	 *	6 - #of dimms
2636 	 */
2637 
2638 	if (nbytes > 0) {
2639 		int i;
2640 		uint64_t ms;
2641 		ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes;
2642 		mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds;
2643 		for (i = 0; i < MC_MAX_SPEEDS - 1; i++) {
2644 			if (ms < mc_scan_speeds[i + 1].mc_period) {
2645 				mcp->mc_speed = mc_scan_speeds[i].mc_speeds;
2646 				break;
2647 			}
2648 		}
2649 	} else
2650 		mcp->mc_speed = 0;
2651 
2652 
2653 	for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) {
2654 		struct mc_bank *bankp;
2655 		mc_retry_info_t *retry;
2656 		uint32_t reg;
2657 		int k;
2658 
2659 		/*
2660 		 * setup bank
2661 		 */
2662 		bk = macaddr[i].bank;
2663 		bankp = &(mcp->mc_bank[bk]);
2664 		bankp->mcb_status = BANK_INSTALLED;
2665 		bankp->mcb_reg_base = REGS_PA(macaddr, i);
2666 
2667 		bankp->mcb_retry_freelist = NULL;
2668 		bankp->mcb_retry_pending = NULL;
2669 		bankp->mcb_active = NULL;
2670 		retry = &bankp->mcb_retry_infos[0];
2671 		for (k = 0; k < MC_RETRY_COUNT; k++, retry++) {
2672 			mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
2673 		}
2674 
2675 		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk));
2676 		bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS);
2677 
2678 		/*
2679 		 * check if mirror mode
2680 		 */
2681 		mirr = LD_MAC_REG(MAC_MIRR(mcp, bk));
2682 
2683 		if (mirr & MAC_MIRR_MIRROR_MODE) {
2684 			MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num,
2685 			    bk);
2686 			bankp->mcb_status |= BANK_MIRROR_MODE;
2687 			mirror_mode = 1;
2688 			/*
2689 			 * The following bit is only used for
2690 			 * error injection.  We should clear it
2691 			 */
2692 			if (mirr & MAC_MIRR_BANK_EXCLUSIVE)
2693 				ST_MAC_REG(MAC_MIRR(mcp, bk), 0);
2694 		}
2695 
2696 		/*
2697 		 * restart if not mirror mode or the other bank
2698 		 * of the mirror is not running
2699 		 */
2700 		if (!(mirr & MAC_MIRR_MIRROR_MODE) ||
2701 		    !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) {
2702 			MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num,
2703 			    bk);
2704 			get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr);
2705 			rsaddr.mi_valid = 0;
2706 			rsaddr.mi_injectrestart = 0;
2707 			restart_patrol(mcp, bk, &rsaddr);
2708 		} else {
2709 			MC_LOG("Not starting up /LSB%d/B%d\n",
2710 			    mcp->mc_board_num, bk);
2711 		}
2712 		bankp->mcb_status |= BANK_PTRL_RUNNING;
2713 	}
2714 	if (len > 0)
2715 		kmem_free(macaddr, len);
2716 
2717 	ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode",
2718 	    mirror_mode);
2719 	if (ret != DDI_PROP_SUCCESS) {
2720 		cmn_err(CE_WARN, "Unable to update mirror-mode property");
2721 	}
2722 
2723 	mcp->mc_dimm_list = mc_get_dimm_list(mcp);
2724 
2725 	/*
2726 	 * set interval in HZ.
2727 	 */
2728 	mcp->mc_last_error = 0;
2729 
2730 	/* restart memory patrol checking */
2731 	mcp->mc_status |= MC_POLL_RUNNING;
2732 
2733 	return (DDI_SUCCESS);
2734 }
2735 
2736 int
2737 mc_board_del(mc_opl_t *mcp)
2738 {
2739 	int i;
2740 	scf_log_t *p;
2741 
2742 	/*
2743 	 * cleanup mac state
2744 	 */
2745 	mutex_enter(&mcp->mc_lock);
2746 	if (mcp->mc_status & MC_MEMORYLESS) {
2747 		mutex_exit(&mcp->mc_lock);
2748 		mutex_destroy(&mcp->mc_lock);
2749 		return (DDI_SUCCESS);
2750 	}
2751 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2752 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2753 			mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED;
2754 		}
2755 	}
2756 
2757 	/* stop memory patrol checking */
2758 	mcp->mc_status &= ~MC_POLL_RUNNING;
2759 
2760 	/* just throw away all the scf logs */
2761 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2762 		while ((p = mcp->mc_scf_log[i]) != NULL) {
2763 			mcp->mc_scf_log[i] = p->sl_next;
2764 			mcp->mc_scf_total[i]--;
2765 			kmem_free(p, sizeof (scf_log_t));
2766 		}
2767 	}
2768 
2769 	if (mcp->mlist)
2770 		mc_memlist_delete(mcp->mlist);
2771 
2772 	if (mcp->mc_dimm_list)
2773 		mc_free_dimm_list(mcp->mc_dimm_list);
2774 
2775 	mutex_exit(&mcp->mc_lock);
2776 
2777 	mutex_destroy(&mcp->mc_lock);
2778 	return (DDI_SUCCESS);
2779 }
2780 
2781 int
2782 mc_suspend(mc_opl_t *mcp, uint32_t flag)
2783 {
2784 	/* stop memory patrol checking */
2785 	mutex_enter(&mcp->mc_lock);
2786 	if (mcp->mc_status & MC_MEMORYLESS) {
2787 		mutex_exit(&mcp->mc_lock);
2788 		return (DDI_SUCCESS);
2789 	}
2790 
2791 	mcp->mc_status &= ~MC_POLL_RUNNING;
2792 
2793 	mcp->mc_status |= flag;
2794 	mutex_exit(&mcp->mc_lock);
2795 
2796 	return (DDI_SUCCESS);
2797 }
2798 
2799 void
2800 opl_mc_update_mlist(void)
2801 {
2802 	int i;
2803 	mc_opl_t *mcp;
2804 
2805 	/*
2806 	 * memory information is not updated until
2807 	 * the post attach/detach stage during DR.
2808 	 * This interface is used by dr_mem to inform
2809 	 * mc-opl to update the mlist.
2810 	 */
2811 
2812 	mutex_enter(&mcmutex);
2813 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2814 		if ((mcp = mc_instances[i]) == NULL)
2815 			continue;
2816 		mutex_enter(&mcp->mc_lock);
2817 		if (mcp->mlist)
2818 			mc_memlist_delete(mcp->mlist);
2819 		mcp->mlist = NULL;
2820 		mc_get_mlist(mcp);
2821 		mutex_exit(&mcp->mc_lock);
2822 	}
2823 	mutex_exit(&mcmutex);
2824 }
2825 
2826 /* caller must clear the SUSPEND bits or this will do nothing */
2827 
2828 int
2829 mc_resume(mc_opl_t *mcp, uint32_t flag)
2830 {
2831 	int i;
2832 	uint64_t basepa;
2833 
2834 	mutex_enter(&mcp->mc_lock);
2835 	if (mcp->mc_status & MC_MEMORYLESS) {
2836 		mutex_exit(&mcp->mc_lock);
2837 		return (DDI_SUCCESS);
2838 	}
2839 	basepa = mcp->mc_start_address;
2840 	if (get_base_address(mcp) == DDI_FAILURE) {
2841 		mutex_exit(&mcp->mc_lock);
2842 		return (DDI_FAILURE);
2843 	}
2844 
2845 	if (basepa != mcp->mc_start_address) {
2846 		if (mcp->mlist)
2847 			mc_memlist_delete(mcp->mlist);
2848 		mcp->mlist = NULL;
2849 		mc_get_mlist(mcp);
2850 	}
2851 
2852 	mcp->mc_status &= ~flag;
2853 
2854 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2855 		mutex_exit(&mcp->mc_lock);
2856 		return (DDI_SUCCESS);
2857 	}
2858 
2859 	if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2860 		/* restart memory patrol checking */
2861 		mcp->mc_status |= MC_POLL_RUNNING;
2862 		for (i = 0; i < BANKNUM_PER_SB; i++) {
2863 			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2864 				mc_check_errors_func(mcp);
2865 			}
2866 		}
2867 	}
2868 	mutex_exit(&mcp->mc_lock);
2869 
2870 	return (DDI_SUCCESS);
2871 }
2872 
2873 static mc_opl_t *
2874 mc_pa_to_mcp(uint64_t pa)
2875 {
2876 	mc_opl_t *mcp;
2877 	int i;
2878 
2879 	ASSERT(MUTEX_HELD(&mcmutex));
2880 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2881 		if ((mcp = mc_instances[i]) == NULL)
2882 			continue;
2883 		/* if mac patrol is suspended, we cannot rely on it */
2884 		if (!(mcp->mc_status & MC_POLL_RUNNING) ||
2885 		    (mcp->mc_status & MC_SOFT_SUSPENDED))
2886 			continue;
2887 		if (mc_rangecheck_pa(mcp, pa)) {
2888 			return (mcp);
2889 		}
2890 	}
2891 	return (NULL);
2892 }
2893 
2894 /*
2895  * Get Physical Board number from Logical one.
2896  */
2897 static int
2898 mc_opl_get_physical_board(int sb)
2899 {
2900 	if (&opl_get_physical_board) {
2901 		return (opl_get_physical_board(sb));
2902 	}
2903 
2904 	cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n");
2905 	return (-1);
2906 }
2907 
2908 /* ARGSUSED */
2909 int
2910 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen,
2911 	int *lenp)
2912 {
2913 	int i;
2914 	int j;
2915 	int sb;
2916 	int bank;
2917 	int cs;
2918 	mc_opl_t *mcp;
2919 	char memb_num;
2920 
2921 	mutex_enter(&mcmutex);
2922 
2923 	if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) ||
2924 	    (!pa_is_valid(mcp, flt_addr))) {
2925 		mutex_exit(&mcmutex);
2926 		if (snprintf(buf, buflen, "UNKNOWN") >= buflen) {
2927 			return (ENOSPC);
2928 		} else {
2929 			if (lenp)
2930 				*lenp = strlen(buf);
2931 		}
2932 		return (0);
2933 	}
2934 
2935 	bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address);
2936 	sb = mcp->mc_phys_board_num;
2937 	cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address);
2938 
2939 	if (sb == -1) {
2940 		mutex_exit(&mcmutex);
2941 		return (ENXIO);
2942 	}
2943 
2944 	if (plat_model == MODEL_DC) {
2945 		i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
2946 		j = (cs == 0) ? i : i + 2;
2947 		snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
2948 		    model_names[plat_model].unit_name, sb,
2949 		    mc_dc_dimm_unum_table[j],
2950 		    mc_dc_dimm_unum_table[j + 1]);
2951 	} else {
2952 		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
2953 		j = (cs == 0) ? i : i + 2;
2954 		memb_num = mc_ff_dimm_unum_table[i][0];
2955 		snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
2956 		    model_names[plat_model].unit_name,
2957 		    model_names[plat_model].mem_name, memb_num,
2958 		    &mc_ff_dimm_unum_table[j][1],
2959 		    &mc_ff_dimm_unum_table[j + 1][1]);
2960 	}
2961 	if (lenp) {
2962 		*lenp = strlen(buf);
2963 	}
2964 	mutex_exit(&mcmutex);
2965 	return (0);
2966 }
2967 
2968 int
2969 opl_mc_suspend(void)
2970 {
2971 	mc_opl_t *mcp;
2972 	int i;
2973 
2974 	mutex_enter(&mcmutex);
2975 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2976 		if ((mcp = mc_instances[i]) == NULL)
2977 			continue;
2978 		mc_suspend(mcp, MC_SOFT_SUSPENDED);
2979 	}
2980 	mutex_exit(&mcmutex);
2981 
2982 	return (0);
2983 }
2984 
2985 int
2986 opl_mc_resume(void)
2987 {
2988 	mc_opl_t *mcp;
2989 	int i;
2990 
2991 	mutex_enter(&mcmutex);
2992 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2993 		if ((mcp = mc_instances[i]) == NULL)
2994 			continue;
2995 		mc_resume(mcp, MC_SOFT_SUSPENDED);
2996 	}
2997 	mutex_exit(&mcmutex);
2998 
2999 	return (0);
3000 }
3001 static void
3002 insert_mcp(mc_opl_t *mcp)
3003 {
3004 	mutex_enter(&mcmutex);
3005 	if (mc_instances[mcp->mc_board_num] != NULL) {
3006 		MC_LOG("mc-opl instance for board# %d already exists\n",
3007 		    mcp->mc_board_num);
3008 	}
3009 	mc_instances[mcp->mc_board_num] = mcp;
3010 	mutex_exit(&mcmutex);
3011 }
3012 
3013 static void
3014 delete_mcp(mc_opl_t *mcp)
3015 {
3016 	mutex_enter(&mcmutex);
3017 	mc_instances[mcp->mc_board_num] = 0;
3018 	mutex_exit(&mcmutex);
3019 }
3020 
3021 /* Error injection interface */
3022 
3023 static void
3024 mc_lock_va(uint64_t pa, caddr_t new_va)
3025 {
3026 	tte_t tte;
3027 
3028 	vtag_flushpage(new_va, (uint64_t)ksfmmup);
3029 	sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K);
3030 	tte.tte_intlo |= TTE_LCK_INT;
3031 	sfmmu_dtlb_ld_kva(new_va, &tte);
3032 }
3033 
3034 static void
3035 mc_unlock_va(caddr_t va)
3036 {
3037 	vtag_flushpage(va, (uint64_t)ksfmmup);
3038 }
3039 
3040 /* ARGSUSED */
3041 int
3042 mc_inject_error(int error_type, uint64_t pa, uint32_t flags)
3043 {
3044 	mc_opl_t *mcp;
3045 	int bank;
3046 	uint32_t dimm_addr;
3047 	uint32_t cntl;
3048 	mc_rsaddr_info_t rsaddr;
3049 	uint32_t data, stat;
3050 	int both_sides = 0;
3051 	uint64_t pa0;
3052 	int extra_injection_needed = 0;
3053 	extern void cpu_flush_ecache(void);
3054 
3055 	MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags);
3056 
3057 	mutex_enter(&mcmutex);
3058 	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3059 		mutex_exit(&mcmutex);
3060 		MC_LOG("mc_inject_error: invalid pa\n");
3061 		return (ENOTSUP);
3062 	}
3063 
3064 	mutex_enter(&mcp->mc_lock);
3065 	mutex_exit(&mcmutex);
3066 
3067 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
3068 		mutex_exit(&mcp->mc_lock);
3069 		MC_LOG("mc-opl has been suspended.  No error injection.\n");
3070 		return (EBUSY);
3071 	}
3072 
3073 	/* convert pa to offset within the board */
3074 	MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address);
3075 
3076 	if (!pa_is_valid(mcp, pa)) {
3077 		mutex_exit(&mcp->mc_lock);
3078 		return (EINVAL);
3079 	}
3080 
3081 	pa0 = pa - mcp->mc_start_address;
3082 
3083 	bank = pa_to_bank(mcp, pa0);
3084 
3085 	if (flags & MC_INJECT_FLAG_OTHER)
3086 		bank = bank ^ 1;
3087 
3088 	if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) {
3089 		mutex_exit(&mcp->mc_lock);
3090 		MC_LOG("Not mirror mode\n");
3091 		return (EINVAL);
3092 	}
3093 
3094 	dimm_addr = pa_to_dimm(mcp, pa0);
3095 
3096 	MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank,
3097 	    dimm_addr);
3098 
3099 
3100 	switch (error_type) {
3101 	case MC_INJECT_INTERMITTENT_MCE:
3102 	case MC_INJECT_PERMANENT_MCE:
3103 	case MC_INJECT_MUE:
3104 		both_sides = 1;
3105 	}
3106 
3107 	if (flags & MC_INJECT_FLAG_RESET)
3108 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0);
3109 
3110 	ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK);
3111 
3112 	if (both_sides) {
3113 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0);
3114 		ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr &
3115 		    MAC_EG_ADD_MASK);
3116 	}
3117 
3118 	switch (error_type) {
3119 	case MC_INJECT_SUE:
3120 		extra_injection_needed = 1;
3121 		/*FALLTHROUGH*/
3122 	case MC_INJECT_UE:
3123 	case MC_INJECT_MUE:
3124 		if (flags & MC_INJECT_FLAG_PATH) {
3125 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3126 			    MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE;
3127 		} else {
3128 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 |
3129 			    MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE;
3130 		}
3131 		flags |= MC_INJECT_FLAG_ST;
3132 		break;
3133 	case MC_INJECT_INTERMITTENT_CE:
3134 	case MC_INJECT_INTERMITTENT_MCE:
3135 		if (flags & MC_INJECT_FLAG_PATH) {
3136 			cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 |
3137 			    MAC_EG_RDERR_ONCE;
3138 		} else {
3139 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3140 			    MAC_EG_DERR_ONCE;
3141 		}
3142 		extra_injection_needed = 1;
3143 		flags |= MC_INJECT_FLAG_ST;
3144 		break;
3145 	case MC_INJECT_PERMANENT_CE:
3146 	case MC_INJECT_PERMANENT_MCE:
3147 		if (flags & MC_INJECT_FLAG_PATH) {
3148 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3149 			    MAC_EG_RDERR_ALWAYS;
3150 		} else {
3151 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3152 			    MAC_EG_DERR_ALWAYS;
3153 		}
3154 		flags |= MC_INJECT_FLAG_ST;
3155 		break;
3156 	case MC_INJECT_CMPE:
3157 		data = 0xabcdefab;
3158 		stphys(pa, data);
3159 		cpu_flush_ecache();
3160 		MC_LOG("CMPE: writing data %x to %lx\n", data, pa);
3161 		ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE);
3162 		stphys(pa, data ^ 0xffffffff);
3163 		membar_sync();
3164 		cpu_flush_ecache();
3165 		ST_MAC_REG(MAC_MIRR(mcp, bank), 0);
3166 		MC_LOG("CMPE: write new data %xto %lx\n", data, pa);
3167 		cntl = 0;
3168 		break;
3169 	case MC_INJECT_NOP:
3170 		cntl = 0;
3171 		break;
3172 	default:
3173 		MC_LOG("mc_inject_error: invalid option\n");
3174 		cntl = 0;
3175 	}
3176 
3177 	if (cntl) {
3178 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK);
3179 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3180 
3181 		if (both_sides) {
3182 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3183 			    MAC_EG_SETUP_MASK);
3184 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3185 		}
3186 	}
3187 
3188 	/*
3189 	 * For all injection cases except compare error, we
3190 	 * must write to the PA to trigger the error.
3191 	 */
3192 
3193 	if (flags & MC_INJECT_FLAG_ST) {
3194 		data = 0xf0e0d0c0;
3195 		MC_LOG("Writing %x to %lx\n", data, pa);
3196 		stphys(pa, data);
3197 		cpu_flush_ecache();
3198 	}
3199 
3200 
3201 	if (flags & MC_INJECT_FLAG_LD) {
3202 		if (flags & MC_INJECT_FLAG_PREFETCH) {
3203 			/*
3204 			 * Use strong prefetch operation to
3205 			 * inject MI errors.
3206 			 */
3207 			page_t *pp;
3208 			extern void mc_prefetch(caddr_t);
3209 
3210 			MC_LOG("prefetch\n");
3211 
3212 			pp = page_numtopp_nolock(pa >> PAGESHIFT);
3213 			if (pp != NULL) {
3214 				caddr_t	va, va1;
3215 
3216 				va = ppmapin(pp, PROT_READ|PROT_WRITE,
3217 				    (caddr_t)-1);
3218 				kpreempt_disable();
3219 				mc_lock_va((uint64_t)pa, va);
3220 				va1 = va + (pa & (PAGESIZE - 1));
3221 				mc_prefetch(va1);
3222 				mc_unlock_va(va);
3223 				kpreempt_enable();
3224 				ppmapout(va);
3225 
3226 				/*
3227 				 * For MI errors, we need one extra
3228 				 * injection for HW patrol to stop.
3229 				 */
3230 				extra_injection_needed = 1;
3231 			} else {
3232 				cmn_err(CE_WARN, "Cannot find page structure"
3233 				    " for PA %lx\n", pa);
3234 			}
3235 		} else {
3236 			MC_LOG("Reading from %lx\n", pa);
3237 			data = ldphys(pa);
3238 			MC_LOG("data = %x\n", data);
3239 		}
3240 
3241 		if (extra_injection_needed) {
3242 			/*
3243 			 * These are the injection cases where the
3244 			 * requested injected errors will not cause the HW
3245 			 * patrol to stop. For these cases, we need to inject
3246 			 * an extra 'real' PTRL error to force the
3247 			 * HW patrol to stop so that we can report the
3248 			 * errors injected. Note that we cannot read
3249 			 * and report error status while the HW patrol
3250 			 * is running.
3251 			 */
3252 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank),
3253 			    cntl & MAC_EG_SETUP_MASK);
3254 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3255 
3256 			if (both_sides) {
3257 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3258 				    MAC_EG_SETUP_MASK);
3259 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3260 			}
3261 			data = 0xf0e0d0c0;
3262 			MC_LOG("Writing %x to %lx\n", data, pa);
3263 			stphys(pa, data);
3264 			cpu_flush_ecache();
3265 		}
3266 	}
3267 
3268 	if (flags & MC_INJECT_FLAG_RESTART) {
3269 		MC_LOG("Restart patrol\n");
3270 		rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num;
3271 		rsaddr.mi_restartaddr.ma_bank = bank;
3272 		rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr;
3273 		rsaddr.mi_valid = 1;
3274 		rsaddr.mi_injectrestart = 1;
3275 		restart_patrol(mcp, bank, &rsaddr);
3276 	}
3277 
3278 	if (flags & MC_INJECT_FLAG_POLL) {
3279 		int running;
3280 		int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
3281 
3282 		MC_LOG("Poll patrol error\n");
3283 		stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank));
3284 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
3285 		running = cntl & MAC_CNTL_PTRL_START;
3286 
3287 		if (!running &&
3288 		    (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) {
3289 			/*
3290 			 * HW patrol stopped and we have errors to
3291 			 * report. Do it.
3292 			 */
3293 			mcp->mc_speedup_period[ebank] = 0;
3294 			rsaddr.mi_valid = 0;
3295 			rsaddr.mi_injectrestart = 0;
3296 			if (IS_MIRROR(mcp, bank)) {
3297 				mc_error_handler_mir(mcp, bank, &rsaddr);
3298 			} else {
3299 				mc_error_handler(mcp, bank, &rsaddr);
3300 			}
3301 
3302 			restart_patrol(mcp, bank, &rsaddr);
3303 		} else {
3304 			/*
3305 			 * We are expecting to report injected
3306 			 * errors but the HW patrol is still running.
3307 			 * Speed up the scanning
3308 			 */
3309 			mcp->mc_speedup_period[ebank] = 2;
3310 			MAC_CMD(mcp, bank, 0);
3311 			restart_patrol(mcp, bank, NULL);
3312 		}
3313 	}
3314 
3315 	mutex_exit(&mcp->mc_lock);
3316 	return (0);
3317 }
3318 
3319 void
3320 mc_stphysio(uint64_t pa, uint32_t data)
3321 {
3322 	MC_LOG("0x%x -> pa(%lx)\n", data, pa);
3323 	stphysio(pa, data);
3324 
3325 	/* force the above write to be processed by mac patrol */
3326 	data = ldphysio(pa);
3327 	MC_LOG("pa(%lx) = 0x%x\n", pa, data);
3328 }
3329 
3330 uint32_t
3331 mc_ldphysio(uint64_t pa)
3332 {
3333 	uint32_t rv;
3334 
3335 	rv = ldphysio(pa);
3336 	MC_LOG("pa(%lx) = 0x%x\n", pa, rv);
3337 	return (rv);
3338 }
3339 
3340 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3341 
3342 /*
3343  * parse_unum_memory -- extract the board number and the DIMM name from
3344  * the unum.
3345  *
3346  * Return 0 for success and non-zero for a failure.
3347  */
3348 int
3349 parse_unum_memory(char *unum, int *board, char *dname)
3350 {
3351 	char *c;
3352 	char x, y, z;
3353 
3354 	if ((c = strstr(unum, "CMU")) != NULL) {
3355 		/* DC Model */
3356 		c += 3;
3357 		*board = (uint8_t)stoi(&c);
3358 		if ((c = strstr(c, "MEM")) == NULL) {
3359 			return (1);
3360 		}
3361 		c += 3;
3362 		if (strlen(c) < 3) {
3363 			return (2);
3364 		}
3365 		if ((!isdigit(c[0])) || (!(isdigit(c[1]))) ||
3366 		    ((c[2] != 'A') && (c[2] != 'B'))) {
3367 			return (3);
3368 		}
3369 		x = c[0];
3370 		y = c[1];
3371 		z = c[2];
3372 	} else if ((c = strstr(unum, "MBU_")) != NULL) {
3373 		/*  FF1/FF2 Model */
3374 		c += 4;
3375 		if ((c[0] != 'A') && (c[0] != 'B')) {
3376 			return (4);
3377 		}
3378 		if ((c = strstr(c, "MEMB")) == NULL) {
3379 			return (5);
3380 		}
3381 		c += 4;
3382 
3383 		x = c[0];
3384 		*board =  ((uint8_t)stoi(&c)) / 4;
3385 		if ((c = strstr(c, "MEM")) == NULL) {
3386 			return (6);
3387 		}
3388 		c += 3;
3389 		if (strlen(c) < 2) {
3390 			return (7);
3391 		}
3392 		if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) {
3393 			return (8);
3394 		}
3395 		y = c[0];
3396 		z = c[1];
3397 	} else {
3398 		return (9);
3399 	}
3400 	if (*board < 0) {
3401 		return (10);
3402 	}
3403 	dname[0] = x;
3404 	dname[1] = y;
3405 	dname[2] = z;
3406 	dname[3] = '\0';
3407 	return (0);
3408 }
3409 
3410 /*
3411  * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and
3412  * the DIMM name.
3413  */
3414 int
3415 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
3416     int buflen, int *lenp)
3417 {
3418 	int		ret = ENODEV;
3419 	mc_dimm_info_t	*d = NULL;
3420 
3421 	if ((d = mcp->mc_dimm_list) == NULL)
3422 		return (ENOTSUP);
3423 
3424 	for (; d != NULL; d = d->md_next) {
3425 		if (strcmp(d->md_dimmname, dname) == 0) {
3426 			break;
3427 		}
3428 	}
3429 	if (d != NULL) {
3430 		*lenp = strlen(d->md_serial) + strlen(d->md_partnum);
3431 		if (buflen <=  *lenp) {
3432 			cmn_err(CE_WARN, "mc_get_mem_sid_dimm: "
3433 			    "buflen is smaller than %d\n", *lenp);
3434 			ret = ENOSPC;
3435 		} else {
3436 			snprintf(buf, buflen, "%s:%s",
3437 			    d->md_serial, d->md_partnum);
3438 			ret = 0;
3439 		}
3440 	}
3441 	MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n",
3442 	    ret, dname, (ret == 0) ? buf : "");
3443 	return (ret);
3444 }
3445 
3446 int
3447 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb,
3448     int bank, uint32_t mf_type, uint32_t d_slot)
3449 {
3450 	int	lenp = buflen;
3451 	int	id;
3452 	int	ret;
3453 	char	*dimmnm;
3454 
3455 	if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
3456 	    mf_type == FLT_TYPE_PERMANENT_CE) {
3457 		if (plat_model == MODEL_DC) {
3458 			id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
3459 			dimmnm = mc_dc_dimm_unum_table[id];
3460 		} else {
3461 			id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
3462 			dimmnm = mc_ff_dimm_unum_table[id];
3463 		}
3464 		if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen,
3465 		    &lenp)) != 0) {
3466 			return (ret);
3467 		}
3468 	} else {
3469 		return (1);
3470 	}
3471 
3472 	return (0);
3473 }
3474 
3475 /*
3476  * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum.
3477  */
3478 int
3479 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3480 {
3481 	int	i;
3482 	int	ret = ENODEV;
3483 	int	board;
3484 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3485 	mc_opl_t *mcp;
3486 
3487 	MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen);
3488 	if ((ret = parse_unum_memory(unum, &board, dname)) != 0) {
3489 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3490 		    unum, ret);
3491 		return (EINVAL);
3492 	}
3493 
3494 	if (board < 0) {
3495 		MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n",
3496 		    board, dname);
3497 		return (EINVAL);
3498 	}
3499 
3500 	mutex_enter(&mcmutex);
3501 	/*
3502 	 * return ENOENT if we can not find the matching board.
3503 	 */
3504 	ret = ENOENT;
3505 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3506 		if ((mcp = mc_instances[i]) == NULL)
3507 			continue;
3508 		mutex_enter(&mcp->mc_lock);
3509 		if (mcp->mc_phys_board_num != board) {
3510 			mutex_exit(&mcp->mc_lock);
3511 			continue;
3512 		}
3513 		ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp);
3514 		if (ret == 0) {
3515 			mutex_exit(&mcp->mc_lock);
3516 			break;
3517 		}
3518 		mutex_exit(&mcp->mc_lock);
3519 	}
3520 	mutex_exit(&mcmutex);
3521 	return (ret);
3522 }
3523 
3524 /*
3525  * mc_get_mem_offset -- get the offset in a DIMM for a given physical address.
3526  */
3527 int
3528 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
3529 {
3530 	int		i;
3531 	int		ret = ENODEV;
3532 	mc_addr_t	maddr;
3533 	mc_opl_t	*mcp;
3534 
3535 	mutex_enter(&mcmutex);
3536 	for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) {
3537 		if ((mcp = mc_instances[i]) == NULL)
3538 			continue;
3539 		mutex_enter(&mcp->mc_lock);
3540 		if (!pa_is_valid(mcp, paddr)) {
3541 			mutex_exit(&mcp->mc_lock);
3542 			continue;
3543 		}
3544 		if (pa_to_maddr(mcp, paddr, &maddr) == 0) {
3545 			*offp = maddr.ma_dimm_addr;
3546 			ret = 0;
3547 		}
3548 		mutex_exit(&mcp->mc_lock);
3549 	}
3550 	mutex_exit(&mcmutex);
3551 	MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n",
3552 	    ret, paddr, *offp);
3553 	return (ret);
3554 }
3555 
3556 /*
3557  * dname_to_bankslot - Get the bank and slot number from the DIMM name.
3558  */
3559 int
3560 dname_to_bankslot(char *dname, int *bank, int *slot)
3561 {
3562 	int i;
3563 	int tsz;
3564 	char **tbl;
3565 
3566 	if (plat_model == MODEL_DC) { /* DC */
3567 		tbl = mc_dc_dimm_unum_table;
3568 		tsz = OPL_MAX_DIMMS;
3569 	} else {
3570 		tbl = mc_ff_dimm_unum_table;
3571 		tsz = 2 * OPL_MAX_DIMMS;
3572 	}
3573 
3574 	for (i = 0; i < tsz; i++) {
3575 		if (strcmp(dname,  tbl[i]) == 0) {
3576 			break;
3577 		}
3578 	}
3579 	if (i == tsz) {
3580 		return (1);
3581 	}
3582 	*bank = INDEX_TO_BANK(i);
3583 	*slot = INDEX_TO_SLOT(i);
3584 	return (0);
3585 }
3586 
3587 /*
3588  * mc_get_mem_addr -- get the physical address of a DIMM corresponding
3589  * to the unum and sid.
3590  */
3591 int
3592 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr)
3593 {
3594 	int	board;
3595 	int	bank;
3596 	int	slot;
3597 	int	i;
3598 	int	ret = ENODEV;
3599 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3600 	mc_addr_t maddr;
3601 	mc_opl_t *mcp;
3602 
3603 	MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n",
3604 	    unum, sid, offset);
3605 	if (parse_unum_memory(unum, &board, dname) != 0) {
3606 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3607 		    unum, ret);
3608 		return (EINVAL);
3609 	}
3610 
3611 	if (board < 0) {
3612 		MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n",
3613 		    board, dname);
3614 		return (EINVAL);
3615 	}
3616 
3617 	mutex_enter(&mcmutex);
3618 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3619 		if ((mcp = mc_instances[i]) == NULL)
3620 			continue;
3621 		mutex_enter(&mcp->mc_lock);
3622 		if (mcp->mc_phys_board_num != board) {
3623 			mutex_exit(&mcp->mc_lock);
3624 			continue;
3625 		}
3626 
3627 		ret = dname_to_bankslot(dname, &bank, &slot);
3628 		MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot);
3629 		if (ret != 0) {
3630 			MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n");
3631 			ret = ENODEV;
3632 		} else {
3633 			maddr.ma_bd = mcp->mc_board_num;
3634 			maddr.ma_bank =  bank;
3635 			maddr.ma_dimm_addr = offset;
3636 			ret = mcaddr_to_pa(mcp, &maddr, paddr);
3637 			if (ret != 0) {
3638 				MC_LOG("mc_get_mem_addr: "
3639 				    "mcaddr_to_pa failed\n");
3640 				ret = ENODEV;
3641 			}
3642 			mutex_exit(&mcp->mc_lock);
3643 			break;
3644 		}
3645 		mutex_exit(&mcp->mc_lock);
3646 	}
3647 	mutex_exit(&mcmutex);
3648 	MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr);
3649 	return (ret);
3650 }
3651 
3652 static void
3653 mc_free_dimm_list(mc_dimm_info_t *d)
3654 {
3655 	mc_dimm_info_t *next;
3656 
3657 	while (d != NULL) {
3658 		next = d->md_next;
3659 		kmem_free(d, sizeof (mc_dimm_info_t));
3660 		d = next;
3661 	}
3662 }
3663 
3664 /*
3665  * mc_get_dimm_list -- get the list of dimms with serial-id info
3666  * from the SP.
3667  */
3668 mc_dimm_info_t *
3669 mc_get_dimm_list(mc_opl_t *mcp)
3670 {
3671 	uint32_t	bufsz;
3672 	uint32_t	maxbufsz;
3673 	int		ret;
3674 	int		sexp;
3675 	board_dimm_info_t *bd_dimmp;
3676 	mc_dimm_info_t	*dimm_list = NULL;
3677 
3678 	maxbufsz = bufsz = sizeof (board_dimm_info_t) +
3679 	    ((MCOPL_MAX_DIMMNAME +  MCOPL_MAX_SERIAL +
3680 	    MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS);
3681 
3682 	bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP);
3683 	ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz);
3684 
3685 	MC_LOG("mc_get_dimm_list:  scf_service_getinfo returned=%d\n", ret);
3686 	if (ret == 0) {
3687 		sexp = sizeof (board_dimm_info_t) +
3688 		    ((bd_dimmp->bd_dnamesz +  bd_dimmp->bd_serialsz +
3689 		    bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms);
3690 
3691 		if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) &&
3692 		    (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) &&
3693 		    (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) &&
3694 		    (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) &&
3695 		    (sexp <= bufsz)) {
3696 
3697 #ifdef DEBUG
3698 			if (oplmc_debug)
3699 				mc_dump_dimm_info(bd_dimmp);
3700 #endif
3701 			dimm_list = mc_prepare_dimmlist(bd_dimmp);
3702 
3703 		} else {
3704 			cmn_err(CE_WARN, "DIMM info version mismatch\n");
3705 		}
3706 	}
3707 	kmem_free(bd_dimmp, maxbufsz);
3708 	MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list);
3709 	return (dimm_list);
3710 }
3711 
3712 /*
3713  * mc_prepare_dimmlist - Prepare the dimm list from the information
3714  * received from the SP.
3715  */
3716 mc_dimm_info_t *
3717 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp)
3718 {
3719 	char	*dimm_name;
3720 	char	*serial;
3721 	char	*part;
3722 	int	dimm;
3723 	int	dnamesz = bd_dimmp->bd_dnamesz;
3724 	int	sersz = bd_dimmp->bd_serialsz;
3725 	int	partsz = bd_dimmp->bd_partnumsz;
3726 	mc_dimm_info_t	*dimm_list = NULL;
3727 	mc_dimm_info_t	*d;
3728 
3729 	dimm_name = (char *)(bd_dimmp + 1);
3730 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3731 
3732 		d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t),
3733 		    KM_SLEEP);
3734 
3735 		bcopy(dimm_name, d->md_dimmname, dnamesz);
3736 		d->md_dimmname[dnamesz] = 0;
3737 
3738 		serial = dimm_name + dnamesz;
3739 		bcopy(serial, d->md_serial, sersz);
3740 		d->md_serial[sersz] = 0;
3741 
3742 		part = serial + sersz;
3743 		bcopy(part, d->md_partnum, partsz);
3744 		d->md_partnum[partsz] = 0;
3745 
3746 		d->md_next = dimm_list;
3747 		dimm_list = d;
3748 		dimm_name = part + partsz;
3749 	}
3750 	return (dimm_list);
3751 }
3752 
3753 #ifdef DEBUG
3754 void
3755 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz)
3756 {
3757 	char dname[MCOPL_MAX_DIMMNAME + 1];
3758 	char serial[MCOPL_MAX_SERIAL + 1];
3759 	char part[ MCOPL_MAX_PARTNUM + 1];
3760 	char *b;
3761 
3762 	b = buf;
3763 	bcopy(b, dname, dnamesz);
3764 	dname[dnamesz] = 0;
3765 
3766 	b += dnamesz;
3767 	bcopy(b, serial, serialsz);
3768 	serial[serialsz] = 0;
3769 
3770 	b += serialsz;
3771 	bcopy(b, part, partnumsz);
3772 	part[partnumsz] = 0;
3773 
3774 	printf("DIMM=%s  Serial=%s PartNum=%s\n", dname, serial, part);
3775 }
3776 
3777 void
3778 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp)
3779 {
3780 	int	dimm;
3781 	int	dnamesz = bd_dimmp->bd_dnamesz;
3782 	int	sersz = bd_dimmp->bd_serialsz;
3783 	int	partsz = bd_dimmp->bd_partnumsz;
3784 	char	*buf;
3785 
3786 	printf("Version=%d Board=%02d DIMMs=%d NameSize=%d "
3787 	    "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version,
3788 	    bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz,
3789 	    bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz);
3790 	printf("======================================================\n");
3791 
3792 	buf = (char *)(bd_dimmp + 1);
3793 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3794 		mc_dump_dimm(buf, dnamesz, sersz, partsz);
3795 		buf += dnamesz + sersz + partsz;
3796 	}
3797 	printf("======================================================\n");
3798 }
3799 
3800 
3801 /* ARGSUSED */
3802 static int
3803 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
3804 	int *rvalp)
3805 {
3806 	caddr_t	buf;
3807 	uint64_t pa;
3808 	int rv = 0;
3809 	int i;
3810 	uint32_t flags;
3811 	static uint32_t offset = 0;
3812 
3813 
3814 	flags = (cmd >> 4) & 0xfffffff;
3815 
3816 	cmd &= 0xf;
3817 
3818 	MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags);
3819 
3820 	if (arg != NULL) {
3821 		if (ddi_copyin((const void *)arg, (void *)&pa,
3822 		    sizeof (uint64_t), 0) < 0) {
3823 			rv = EFAULT;
3824 			return (rv);
3825 		}
3826 		buf = NULL;
3827 	} else {
3828 		buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP);
3829 
3830 		pa = va_to_pa(buf);
3831 		pa += offset;
3832 
3833 		offset += 64;
3834 		if (offset >= PAGESIZE)
3835 			offset = 0;
3836 	}
3837 
3838 	switch (cmd) {
3839 	case MCI_CE:
3840 		mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags);
3841 		break;
3842 	case MCI_PERM_CE:
3843 		mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags);
3844 		break;
3845 	case MCI_UE:
3846 		mc_inject_error(MC_INJECT_UE, pa, flags);
3847 		break;
3848 	case MCI_M_CE:
3849 		mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags);
3850 		break;
3851 	case MCI_M_PCE:
3852 		mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags);
3853 		break;
3854 	case MCI_M_UE:
3855 		mc_inject_error(MC_INJECT_MUE, pa, flags);
3856 		break;
3857 	case MCI_CMP:
3858 		mc_inject_error(MC_INJECT_CMPE, pa, flags);
3859 		break;
3860 	case MCI_NOP:
3861 		mc_inject_error(MC_INJECT_NOP, pa, flags); break;
3862 	case MCI_SHOW_ALL:
3863 		mc_debug_show_all = 1;
3864 		break;
3865 	case MCI_SHOW_NONE:
3866 		mc_debug_show_all = 0;
3867 		break;
3868 	case MCI_ALLOC:
3869 		/*
3870 		 * just allocate some kernel memory and never free it
3871 		 * 512 MB seems to be the maximum size supported.
3872 		 */
3873 		cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512);
3874 		for (i = 0; i < flags; i++) {
3875 			buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP);
3876 			cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n",
3877 			    (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf));
3878 		}
3879 		break;
3880 	case MCI_SUSPEND:
3881 		(void) opl_mc_suspend();
3882 		break;
3883 	case MCI_RESUME:
3884 		(void) opl_mc_resume();
3885 		break;
3886 	default:
3887 		rv = ENXIO;
3888 	}
3889 	return (rv);
3890 }
3891 
3892 #endif /* DEBUG */
3893