xref: /titanic_44/usr/src/uts/sun4u/opl/io/mc-opl.c (revision 3d9422220748313d64e24a04b64e12efcb070172)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2008
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/conf.h>
34 #include <sys/modctl.h>
35 #include <sys/stat.h>
36 #include <sys/async.h>
37 #include <sys/machcpuvar.h>
38 #include <sys/machsystm.h>
39 #include <sys/promif.h>
40 #include <sys/ksynch.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/sunndi.h>
44 #include <sys/ddifm.h>
45 #include <sys/fm/protocol.h>
46 #include <sys/fm/util.h>
47 #include <sys/kmem.h>
48 #include <sys/fm/io/opl_mc_fm.h>
49 #include <sys/memlist.h>
50 #include <sys/param.h>
51 #include <sys/disp.h>
52 #include <vm/page.h>
53 #include <sys/mc-opl.h>
54 #include <sys/opl.h>
55 #include <sys/opl_dimm.h>
56 #include <sys/scfd/scfostoescf.h>
57 #include <sys/cpu_module.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/vmem.h>
60 #include <vm/hat_sfmmu.h>
61 #include <sys/vmsystm.h>
62 #include <sys/membar.h>
63 
64 /*
65  * Function prototypes
66  */
67 static int mc_open(dev_t *, int, int, cred_t *);
68 static int mc_close(dev_t, int, int, cred_t *);
69 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
70 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
71 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
72 
73 static int mc_poll_init(void);
74 static void mc_poll_fini(void);
75 static int mc_board_add(mc_opl_t *mcp);
76 static int mc_board_del(mc_opl_t *mcp);
77 static int mc_suspend(mc_opl_t *mcp, uint32_t flag);
78 static int mc_resume(mc_opl_t *mcp, uint32_t flag);
79 int opl_mc_suspend(void);
80 int opl_mc_resume(void);
81 
82 static void insert_mcp(mc_opl_t *mcp);
83 static void delete_mcp(mc_opl_t *mcp);
84 
85 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr);
86 
87 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa);
88 
89 int mc_get_mem_unum(int, uint64_t, char *, int, int *);
90 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr);
91 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
92 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp);
93 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
94     int buflen, int *lenp);
95 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp);
96 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp);
97 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank,
98     uint32_t mf_type, uint32_t d_slot);
99 static void mc_free_dimm_list(mc_dimm_info_t *d);
100 static void mc_get_mlist(mc_opl_t *);
101 static void mc_polling(void);
102 static int mc_opl_get_physical_board(int);
103 
104 static void mc_clear_rewrite(mc_opl_t *mcp, int i);
105 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state);
106 
107 #ifdef	DEBUG
108 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *);
109 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz);
110 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp);
111 #endif
112 
113 #pragma weak opl_get_physical_board
114 extern int opl_get_physical_board(int);
115 extern int plat_max_boards(void);
116 
117 /*
118  * Configuration data structures
119  */
120 static struct cb_ops mc_cb_ops = {
121 	mc_open,			/* open */
122 	mc_close,			/* close */
123 	nulldev,			/* strategy */
124 	nulldev,			/* print */
125 	nodev,				/* dump */
126 	nulldev,			/* read */
127 	nulldev,			/* write */
128 	mc_ioctl,			/* ioctl */
129 	nodev,				/* devmap */
130 	nodev,				/* mmap */
131 	nodev,				/* segmap */
132 	nochpoll,			/* poll */
133 	ddi_prop_op,			/* cb_prop_op */
134 	0,				/* streamtab */
135 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
136 	CB_REV,				/* rev */
137 	nodev,				/* cb_aread */
138 	nodev				/* cb_awrite */
139 };
140 
141 static struct dev_ops mc_ops = {
142 	DEVO_REV,			/* rev */
143 	0,				/* refcnt  */
144 	ddi_getinfo_1to1,		/* getinfo */
145 	nulldev,			/* identify */
146 	nulldev,			/* probe */
147 	mc_attach,			/* attach */
148 	mc_detach,			/* detach */
149 	nulldev,			/* reset */
150 	&mc_cb_ops,			/* cb_ops */
151 	(struct bus_ops *)0,		/* bus_ops */
152 	nulldev				/* power */
153 };
154 
155 /*
156  * Driver globals
157  */
158 
159 static enum {
160 	MODEL_FF1,
161 	MODEL_FF2,
162 	MODEL_DC,
163 	MODEL_IKKAKU
164 } plat_model = MODEL_DC;	/* The default behaviour is DC */
165 
166 static struct plat_model_names {
167 	const char *unit_name;
168 	const char *mem_name;
169 } model_names[] = {
170 	{ "MBU_A", "MEMB" },
171 	{ "MBU_B", "MEMB" },
172 	{ "CMU", "" },
173 	{ "MBU_A", "" }
174 };
175 
176 /*
177  * The DIMM Names for DC platform.
178  * The index into this table is made up of (bank, dslot),
179  * Where dslot occupies bits 0-1 and bank occupies 2-4.
180  */
181 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = {
182 	/* --------CMUnn----------- */
183 	/* --CS0-----|--CS1------ */
184 	/* -H-|--L-- | -H- | -L-- */
185 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
186 	"13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */
187 	"23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */
188 	"33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */
189 	"01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */
190 	"11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */
191 	"21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */
192 	"31A", "30A", "31B", "30B"  /* Bank 7 (MAC 3 bank 1) */
193 };
194 
195 /*
196  * The DIMM Names for FF1/FF2/IKKAKU platforms.
197  * The index into this table is made up of (board, bank, dslot),
198  * Where dslot occupies bits 0-1, bank occupies 2-4 and
199  * board occupies the bit 5.
200  */
201 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = {
202 	/* --------CMU0---------- */
203 	/* --CS0-----|--CS1------ */
204 	/* -H-|--L-- | -H- | -L-- */
205 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
206 	"01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */
207 	"13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */
208 	"11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */
209 	"23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */
210 	"21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */
211 	"33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */
212 	"31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */
213 	/* --------CMU1---------- */
214 	/* --CS0-----|--CS1------ */
215 	/* -H-|--L-- | -H- | -L-- */
216 	"43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */
217 	"41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */
218 	"53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */
219 	"51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */
220 	"63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */
221 	"61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */
222 	"73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */
223 	"71A", "70A", "71B", "70B"  /* Bank 7 (MAC 3 bank 1) */
224 };
225 
226 #define	BD_BK_SLOT_TO_INDEX(bd, bk, s)			\
227 	(((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03))
228 
229 #define	INDEX_TO_BANK(i)			(((i) & 0x1C) >> 2)
230 #define	INDEX_TO_SLOT(i)			((i) & 0x03)
231 
232 #define	SLOT_TO_CS(slot)	((slot & 0x3) >> 1)
233 
234 /* Isolation unit size is 64 MB */
235 #define	MC_ISOLATION_BSIZE	(64 * 1024 * 1024)
236 
237 #define	MC_MAX_SPEEDS 7
238 
239 typedef struct {
240 	uint32_t mc_speeds;
241 	uint32_t mc_period;
242 } mc_scan_speed_t;
243 
244 #define	MC_CNTL_SPEED_SHIFT 26
245 
246 /*
247  * In mirror mode, we normalized the bank idx to "even" since
248  * the HW treats them as one unit w.r.t programming.
249  * This bank index will be the "effective" bank index.
250  * All mirrored bank state info on mc_period, mc_speedup_period
251  * will be stored in the even bank structure to avoid code duplication.
252  */
253 #define	MIRROR_IDX(bankidx)	(bankidx & ~1)
254 
255 static mc_scan_speed_t	mc_scan_speeds[MC_MAX_SPEEDS] = {
256 	{0x6 << MC_CNTL_SPEED_SHIFT, 0},
257 	{0x5 << MC_CNTL_SPEED_SHIFT, 32},
258 	{0x4 << MC_CNTL_SPEED_SHIFT, 64},
259 	{0x3 << MC_CNTL_SPEED_SHIFT, 128},
260 	{0x2 << MC_CNTL_SPEED_SHIFT, 256},
261 	{0x1 << MC_CNTL_SPEED_SHIFT, 512},
262 	{0x0 << MC_CNTL_SPEED_SHIFT, 1024}
263 };
264 
265 static uint32_t	mc_max_speed = (0x6 << 26);
266 
267 int mc_isolation_bsize = MC_ISOLATION_BSIZE;
268 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC;
269 int mc_max_scf_retry = 16;
270 int mc_max_scf_logs = 64;
271 int mc_max_errlog_processed = BANKNUM_PER_SB*2;
272 int mc_scan_period = 12 * 60 * 60;	/* 12 hours period */
273 int mc_max_rewrite_loop = 100;
274 int mc_rewrite_delay = 10;
275 /*
276  * it takes SCF about 300 m.s. to process a requst.  We can bail out
277  * if it is busy.  It does not pay to wait for it too long.
278  */
279 int mc_max_scf_loop = 2;
280 int mc_scf_delay = 100;
281 int mc_pce_dropped = 0;
282 int mc_poll_priority = MINCLSYSPRI;
283 int mc_max_rewrite_retry = 6 * 60;
284 
285 
286 /*
287  * Mutex hierarchy in mc-opl
288  * If both mcmutex and mc_lock must be held,
289  * mcmutex must be acquired first, and then mc_lock.
290  */
291 
292 static kmutex_t mcmutex;
293 mc_opl_t *mc_instances[OPL_MAX_BOARDS];
294 
295 static kmutex_t mc_polling_lock;
296 static kcondvar_t mc_polling_cv;
297 static kcondvar_t mc_poll_exit_cv;
298 static int mc_poll_cmd = 0;
299 static int mc_pollthr_running = 0;
300 int mc_timeout_period = 0; /* this is in m.s. */
301 void *mc_statep;
302 
303 #ifdef	DEBUG
304 int oplmc_debug = 0;
305 #endif
306 
307 static int mc_debug_show_all = 0;
308 
309 extern struct mod_ops mod_driverops;
310 
311 static struct modldrv modldrv = {
312 	&mod_driverops,			/* module type, this one is a driver */
313 	"OPL Memory-controller %I%",	/* module name */
314 	&mc_ops,			/* driver ops */
315 };
316 
317 static struct modlinkage modlinkage = {
318 	MODREV_1,		/* rev */
319 	(void *)&modldrv,
320 	NULL
321 };
322 
323 #pragma weak opl_get_mem_unum
324 #pragma weak opl_get_mem_sid
325 #pragma weak opl_get_mem_offset
326 #pragma weak opl_get_mem_addr
327 
328 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
329 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
330 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
331 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset,
332     uint64_t *paddr);
333 
334 
335 /*
336  * pseudo-mc node portid format
337  *
338  *		[10]   = 0
339  *		[9]    = 1
340  *		[8]    = LSB_ID[4] = 0
341  *		[7:4]  = LSB_ID[3:0]
342  *		[3:0]  = 0
343  *
344  */
345 
346 /*
347  * These are the module initialization routines.
348  */
349 int
350 _init(void)
351 {
352 	int	error;
353 	int	plen;
354 	char	model[20];
355 	pnode_t	node;
356 
357 
358 	if ((error = ddi_soft_state_init(&mc_statep,
359 	    sizeof (mc_opl_t), 1)) != 0)
360 		return (error);
361 
362 	if ((error = mc_poll_init()) != 0) {
363 		ddi_soft_state_fini(&mc_statep);
364 		return (error);
365 	}
366 
367 	mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
368 	if (&opl_get_mem_unum)
369 		opl_get_mem_unum = mc_get_mem_unum;
370 	if (&opl_get_mem_sid)
371 		opl_get_mem_sid = mc_get_mem_sid;
372 	if (&opl_get_mem_offset)
373 		opl_get_mem_offset = mc_get_mem_offset;
374 	if (&opl_get_mem_addr)
375 		opl_get_mem_addr = mc_get_mem_addr;
376 
377 	node = prom_rootnode();
378 	plen = prom_getproplen(node, "model");
379 
380 	if (plen > 0 && plen < sizeof (model)) {
381 		(void) prom_getprop(node, "model", model);
382 		model[plen] = '\0';
383 		if (strcmp(model, "FF1") == 0)
384 			plat_model = MODEL_FF1;
385 		else if (strcmp(model, "FF2") == 0)
386 			plat_model = MODEL_FF2;
387 		else if (strncmp(model, "DC", 2) == 0)
388 			plat_model = MODEL_DC;
389 		else if (strcmp(model, "IKKAKU") == 0)
390 			plat_model = MODEL_IKKAKU;
391 	}
392 
393 	error =  mod_install(&modlinkage);
394 	if (error != 0) {
395 		if (&opl_get_mem_unum)
396 			opl_get_mem_unum = NULL;
397 		if (&opl_get_mem_sid)
398 			opl_get_mem_sid = NULL;
399 		if (&opl_get_mem_offset)
400 			opl_get_mem_offset = NULL;
401 		if (&opl_get_mem_addr)
402 			opl_get_mem_addr = NULL;
403 		mutex_destroy(&mcmutex);
404 		mc_poll_fini();
405 		ddi_soft_state_fini(&mc_statep);
406 	}
407 	return (error);
408 }
409 
410 int
411 _fini(void)
412 {
413 	int error;
414 
415 	if ((error = mod_remove(&modlinkage)) != 0)
416 		return (error);
417 
418 	if (&opl_get_mem_unum)
419 		opl_get_mem_unum = NULL;
420 	if (&opl_get_mem_sid)
421 		opl_get_mem_sid = NULL;
422 	if (&opl_get_mem_offset)
423 		opl_get_mem_offset = NULL;
424 	if (&opl_get_mem_addr)
425 		opl_get_mem_addr = NULL;
426 
427 	mutex_destroy(&mcmutex);
428 	mc_poll_fini();
429 	ddi_soft_state_fini(&mc_statep);
430 
431 	return (0);
432 }
433 
434 int
435 _info(struct modinfo *modinfop)
436 {
437 	return (mod_info(&modlinkage, modinfop));
438 }
439 
440 static void
441 mc_polling_thread()
442 {
443 	mutex_enter(&mc_polling_lock);
444 	mc_pollthr_running = 1;
445 	while (!(mc_poll_cmd & MC_POLL_EXIT)) {
446 		mc_polling();
447 		cv_timedwait(&mc_polling_cv, &mc_polling_lock,
448 		    ddi_get_lbolt() + mc_timeout_period);
449 	}
450 	mc_pollthr_running = 0;
451 
452 	/*
453 	 * signal if any one is waiting for this thread to exit.
454 	 */
455 	cv_signal(&mc_poll_exit_cv);
456 	mutex_exit(&mc_polling_lock);
457 	thread_exit();
458 	/* NOTREACHED */
459 }
460 
461 static int
462 mc_poll_init()
463 {
464 	mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL);
465 	cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL);
466 	cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL);
467 	return (0);
468 }
469 
470 static void
471 mc_poll_fini()
472 {
473 	mutex_enter(&mc_polling_lock);
474 	if (mc_pollthr_running) {
475 		mc_poll_cmd = MC_POLL_EXIT;
476 		cv_signal(&mc_polling_cv);
477 		while (mc_pollthr_running) {
478 			cv_wait(&mc_poll_exit_cv, &mc_polling_lock);
479 		}
480 	}
481 	mutex_exit(&mc_polling_lock);
482 	mutex_destroy(&mc_polling_lock);
483 	cv_destroy(&mc_polling_cv);
484 	cv_destroy(&mc_poll_exit_cv);
485 }
486 
487 static int
488 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
489 {
490 	mc_opl_t *mcp;
491 	int instance;
492 	int rv;
493 
494 	/* get the instance of this devi */
495 	instance = ddi_get_instance(devi);
496 
497 	switch (cmd) {
498 	case DDI_ATTACH:
499 		break;
500 	case DDI_RESUME:
501 		mcp = ddi_get_soft_state(mc_statep, instance);
502 		rv = mc_resume(mcp, MC_DRIVER_SUSPENDED);
503 		return (rv);
504 	default:
505 		return (DDI_FAILURE);
506 	}
507 
508 	if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS)
509 		return (DDI_FAILURE);
510 
511 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
512 		goto bad;
513 	}
514 
515 	if (mc_timeout_period == 0) {
516 		mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi,
517 		    DDI_PROP_DONTPASS, "mc-timeout-interval-sec",
518 		    mc_patrol_interval_sec);
519 		mc_timeout_period = drv_usectohz(1000000 *
520 		    mc_patrol_interval_sec / OPL_MAX_BOARDS);
521 	}
522 
523 	/* set informations in mc state */
524 	mcp->mc_dip = devi;
525 
526 	if (mc_board_add(mcp))
527 		goto bad;
528 
529 	insert_mcp(mcp);
530 
531 	/*
532 	 * Start the polling thread if it is not running already.
533 	 */
534 	mutex_enter(&mc_polling_lock);
535 	if (!mc_pollthr_running) {
536 		(void) thread_create(NULL, 0, (void (*)())mc_polling_thread,
537 		    NULL, 0, &p0, TS_RUN, mc_poll_priority);
538 	}
539 	mutex_exit(&mc_polling_lock);
540 	ddi_report_dev(devi);
541 
542 	return (DDI_SUCCESS);
543 
544 bad:
545 	ddi_soft_state_free(mc_statep, instance);
546 	return (DDI_FAILURE);
547 }
548 
549 /* ARGSUSED */
550 static int
551 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
552 {
553 	int rv;
554 	int instance;
555 	mc_opl_t *mcp;
556 
557 	/* get the instance of this devi */
558 	instance = ddi_get_instance(devi);
559 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
560 		return (DDI_FAILURE);
561 	}
562 
563 	switch (cmd) {
564 	case DDI_SUSPEND:
565 		rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED);
566 		return (rv);
567 	case DDI_DETACH:
568 		break;
569 	default:
570 		return (DDI_FAILURE);
571 	}
572 
573 	delete_mcp(mcp);
574 	if (mc_board_del(mcp) != DDI_SUCCESS) {
575 		return (DDI_FAILURE);
576 	}
577 
578 	/* free up the soft state */
579 	ddi_soft_state_free(mc_statep, instance);
580 
581 	return (DDI_SUCCESS);
582 }
583 
584 /* ARGSUSED */
585 static int
586 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
587 {
588 	return (0);
589 }
590 
591 /* ARGSUSED */
592 static int
593 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
594 {
595 	return (0);
596 }
597 
598 /* ARGSUSED */
599 static int
600 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
601 	int *rvalp)
602 {
603 #ifdef DEBUG
604 	return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp));
605 #else
606 	return (ENXIO);
607 #endif
608 }
609 
610 /*
611  * PA validity check:
612  * This function return 1 if the PA is a valid PA
613  * in the running Solaris instance i.e. in physinstall
614  * Otherwise, return 0.
615  */
616 
617 /* ARGSUSED */
618 static int
619 pa_is_valid(mc_opl_t *mcp, uint64_t addr)
620 {
621 	if (mcp->mlist == NULL)
622 		mc_get_mlist(mcp);
623 
624 	if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) {
625 		return (1);
626 	}
627 	return (0);
628 }
629 
630 /*
631  * mac-pa translation routines.
632  *
633  *    Input: mc driver state, (LSB#, Bank#, DIMM address)
634  *    Output: physical address
635  *
636  *    Valid   - return value:  0
637  *    Invalid - return value: -1
638  */
639 static int
640 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa)
641 {
642 	int i;
643 	uint64_t pa_offset = 0;
644 	int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1;
645 	int bank = maddr->ma_bank;
646 	mc_addr_t maddr1;
647 	int bank0, bank1;
648 
649 	MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
650 	    maddr->ma_dimm_addr);
651 
652 	/* loc validity check */
653 	ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd);
654 	ASSERT(bank >= 0 && OPL_BANK_MAX > bank);
655 
656 	/* Do translation */
657 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
658 		int pa_bit = 0;
659 		int mc_bit = mcp->mc_trans_table[cs][i];
660 		if (mc_bit < MC_ADDRESS_BITS) {
661 			pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1;
662 		} else if (mc_bit == MP_NONE) {
663 			pa_bit = 0;
664 		} else if (mc_bit == MP_BANK_0) {
665 			pa_bit = bank & 1;
666 		} else if (mc_bit == MP_BANK_1) {
667 			pa_bit = (bank >> 1) & 1;
668 		} else if (mc_bit == MP_BANK_2) {
669 			pa_bit = (bank >> 2) & 1;
670 		}
671 		pa_offset |= ((uint64_t)pa_bit) << i;
672 	}
673 	*pa = mcp->mc_start_address + pa_offset;
674 	MC_LOG("pa = %lx\n", *pa);
675 
676 	if (pa_to_maddr(mcp, *pa, &maddr1) == -1) {
677 		cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to "
678 		    "convert PA %lx\n", maddr->ma_bd, bank,
679 		    maddr->ma_dimm_addr, *pa);
680 		return (-1);
681 	}
682 
683 	/*
684 	 * In mirror mode, PA is always translated to the even bank.
685 	 */
686 	if (IS_MIRROR(mcp, maddr->ma_bank)) {
687 		bank0 = maddr->ma_bank & ~(1);
688 		bank1 = maddr1.ma_bank & ~(1);
689 	} else {
690 		bank0 = maddr->ma_bank;
691 		bank1 = maddr1.ma_bank;
692 	}
693 	/*
694 	 * there is no need to check ma_bd because it is generated from
695 	 * mcp.  They are the same.
696 	 */
697 	if ((bank0 == bank1) && (maddr->ma_dimm_addr ==
698 	    maddr1.ma_dimm_addr)) {
699 		return (0);
700 	} else {
701 		cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, "
702 		    "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
703 		    maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank,
704 		    maddr1.ma_dimm_addr);
705 		return (-1);
706 	}
707 }
708 
709 /*
710  * PA to CS (used by pa_to_maddr).
711  */
712 static int
713 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset)
714 {
715 	int i;
716 	int cs = 1;
717 
718 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
719 		/* MAC address bit<29> is arranged on the same PA bit */
720 		/* on both table. So we may use any table. */
721 		if (mcp->mc_trans_table[0][i] == CS_SHIFT) {
722 			cs = (pa_offset >> i) & 1;
723 			break;
724 		}
725 	}
726 	return (cs);
727 }
728 
729 /*
730  * PA to DIMM (used by pa_to_maddr).
731  */
732 /* ARGSUSED */
733 static uint32_t
734 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset)
735 {
736 	int i;
737 	int cs = pa_to_cs(mcp, pa_offset);
738 	uint32_t dimm_addr = 0;
739 
740 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
741 		int pa_bit_value = (pa_offset >> i) & 1;
742 		int mc_bit = mcp->mc_trans_table[cs][i];
743 		if (mc_bit < MC_ADDRESS_BITS) {
744 			dimm_addr |= pa_bit_value << mc_bit;
745 		}
746 	}
747 	dimm_addr |= cs << CS_SHIFT;
748 	return (dimm_addr);
749 }
750 
751 /*
752  * PA to Bank (used by pa_to_maddr).
753  */
754 static int
755 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset)
756 {
757 	int i;
758 	int cs = pa_to_cs(mcp, pa_offset);
759 	int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT];
760 
761 
762 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
763 		int pa_bit_value = (pa_offset >> i) & 1;
764 		int mc_bit = mcp->mc_trans_table[cs][i];
765 		switch (mc_bit) {
766 		case MP_BANK_0:
767 			bankno |= pa_bit_value;
768 			break;
769 		case MP_BANK_1:
770 			bankno |= pa_bit_value << 1;
771 			break;
772 		case MP_BANK_2:
773 			bankno |= pa_bit_value << 2;
774 			break;
775 		}
776 	}
777 
778 	return (bankno);
779 }
780 
781 /*
782  * PA to MAC address translation
783  *
784  *   Input: MAC driver state, physicall adress
785  *   Output: LSB#, Bank id, mac address
786  *
787  *    Valid   - return value:  0
788  *    Invalid - return value: -1
789  */
790 
791 int
792 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr)
793 {
794 	uint64_t pa_offset;
795 
796 	if (!mc_rangecheck_pa(mcp, pa))
797 		return (-1);
798 
799 	/* Do translation */
800 	pa_offset = pa - mcp->mc_start_address;
801 
802 	maddr->ma_bd = mcp->mc_board_num;
803 	maddr->ma_phys_bd = mcp->mc_phys_board_num;
804 	maddr->ma_bank = pa_to_bank(mcp, pa_offset);
805 	maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset);
806 	MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd,
807 	    maddr->ma_bank, maddr->ma_dimm_addr);
808 	return (0);
809 }
810 
811 /*
812  * UNUM format for DC is "/CMUnn/MEMxyZ", where
813  *	nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3.
814  *	x = MAC 0..3
815  *	y = 0..3 (slot info).
816  *	Z = 'A' or 'B'
817  *
818  * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where
819  *	x = 0..3 (MEMB number)
820  *	y = 0..3 (slot info).
821  *	Z = 'A' or 'B'
822  *
823  * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ", where
824  *	x = 0..7 (MEMB number)
825  *	y = 0..3 (slot info).
826  *	Z = 'A' or 'B'
827  *
828  * UNUM format for IKKAKU is "/MBU_A/MEMyZ", where
829  *	y = 0..3 (slot info).
830  *	Z = 'A' or 'B'
831  *
832  */
833 int
834 mc_set_mem_unum(char *buf, int buflen, int sb, int bank,
835     uint32_t mf_type, uint32_t d_slot)
836 {
837 	char *dimmnm;
838 	char memb_num;
839 	int cs;
840 	int i;
841 	int j;
842 
843 	cs = SLOT_TO_CS(d_slot);
844 
845 	switch (plat_model) {
846 	case MODEL_DC:
847 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
848 		    mf_type == FLT_TYPE_PERMANENT_CE) {
849 			i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
850 			dimmnm = mc_dc_dimm_unum_table[i];
851 			snprintf(buf, buflen, "/%s%02d/MEM%s",
852 			    model_names[plat_model].unit_name, sb, dimmnm);
853 		} else {
854 			i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
855 			j = (cs == 0) ?  i : i + 2;
856 			snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
857 			    model_names[plat_model].unit_name, sb,
858 			    mc_dc_dimm_unum_table[j],
859 			    mc_dc_dimm_unum_table[j + 1]);
860 		}
861 		break;
862 	case MODEL_FF1:
863 	case MODEL_FF2:
864 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
865 		    mf_type == FLT_TYPE_PERMANENT_CE) {
866 			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
867 			dimmnm = mc_ff_dimm_unum_table[i];
868 			memb_num = dimmnm[0];
869 			snprintf(buf, buflen, "/%s/%s%c/MEM%s",
870 			    model_names[plat_model].unit_name,
871 			    model_names[plat_model].mem_name,
872 			    memb_num, &dimmnm[1]);
873 		} else {
874 			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
875 			j = (cs == 0) ?  i : i + 2;
876 			memb_num = mc_ff_dimm_unum_table[i][0],
877 			    snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
878 			    model_names[plat_model].unit_name,
879 			    model_names[plat_model].mem_name, memb_num,
880 			    &mc_ff_dimm_unum_table[j][1],
881 			    &mc_ff_dimm_unum_table[j + 1][1]);
882 		}
883 		break;
884 	case MODEL_IKKAKU:
885 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
886 		    mf_type == FLT_TYPE_PERMANENT_CE) {
887 			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
888 			dimmnm = mc_ff_dimm_unum_table[i];
889 			snprintf(buf, buflen, "/%s/MEM%s",
890 			    model_names[plat_model].unit_name, &dimmnm[1]);
891 		} else {
892 			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
893 			j = (cs == 0) ?  i : i + 2;
894 			memb_num = mc_ff_dimm_unum_table[i][0],
895 			    snprintf(buf, buflen, "/%s/MEM%s MEM%s",
896 			    model_names[plat_model].unit_name,
897 			    &mc_ff_dimm_unum_table[j][1],
898 			    &mc_ff_dimm_unum_table[j + 1][1]);
899 		}
900 		break;
901 	default:
902 		return (-1);
903 	}
904 	return (0);
905 }
906 
907 static void
908 mc_ereport_post(mc_aflt_t *mc_aflt)
909 {
910 	char buf[FM_MAX_CLASS];
911 	char device_path[MAXPATHLEN];
912 	char sid[MAXPATHLEN];
913 	nv_alloc_t *nva = NULL;
914 	nvlist_t *ereport, *detector, *resource;
915 	errorq_elem_t *eqep;
916 	int nflts;
917 	mc_flt_stat_t *flt_stat;
918 	int i, n;
919 	int blen = MAXPATHLEN;
920 	char *p, *s = NULL;
921 	uint32_t values[2], synd[2], dslot[2];
922 	uint64_t offset = (uint64_t)-1;
923 	int ret = -1;
924 
925 	if (panicstr) {
926 		eqep = errorq_reserve(ereport_errorq);
927 		if (eqep == NULL)
928 			return;
929 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
930 		nva = errorq_elem_nva(ereport_errorq, eqep);
931 	} else {
932 		ereport = fm_nvlist_create(nva);
933 	}
934 
935 	/*
936 	 * Create the scheme "dev" FMRI.
937 	 */
938 	detector = fm_nvlist_create(nva);
939 	resource = fm_nvlist_create(nva);
940 
941 	nflts = mc_aflt->mflt_nflts;
942 
943 	ASSERT(nflts >= 1 && nflts <= 2);
944 
945 	flt_stat = mc_aflt->mflt_stat[0];
946 	(void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path);
947 	(void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL,
948 	    device_path, NULL);
949 
950 	/*
951 	 * Encode all the common data into the ereport.
952 	 */
953 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS,
954 	    mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS,
955 	    mc_aflt->mflt_erpt_class);
956 
957 	MC_LOG("mc_ereport_post: ereport %s\n", buf);
958 
959 
960 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
961 	    fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL);
962 
963 	/*
964 	 * Set payload.
965 	 */
966 	fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32,
967 	    flt_stat->mf_flt_maddr.ma_bd, NULL);
968 
969 	fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64,
970 	    flt_stat->mf_flt_paddr, NULL);
971 
972 	if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE ||
973 	    flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
974 		fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8,
975 		    ECC_STICKY, NULL);
976 	}
977 
978 	for (i = 0; i < nflts; i++)
979 		values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank;
980 
981 	fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts,
982 	    values, NULL);
983 
984 	for (i = 0; i < nflts; i++)
985 		values[i] = mc_aflt->mflt_stat[i]->mf_cntl;
986 
987 	fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts,
988 	    values, NULL);
989 
990 	for (i = 0; i < nflts; i++)
991 		values[i] = mc_aflt->mflt_stat[i]->mf_err_add;
992 
993 	/* offset is set only for PCE and ICE */
994 	if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE ||
995 	    mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) {
996 		offset = values[0];
997 
998 	}
999 	fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts,
1000 	    values, NULL);
1001 
1002 	for (i = 0; i < nflts; i++)
1003 		values[i] = mc_aflt->mflt_stat[i]->mf_err_log;
1004 
1005 	fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts,
1006 	    values, NULL);
1007 
1008 	for (i = 0; i < nflts; i++) {
1009 		flt_stat = mc_aflt->mflt_stat[i];
1010 		if (flt_stat->mf_errlog_valid) {
1011 			synd[i] = flt_stat->mf_synd;
1012 			dslot[i] = flt_stat->mf_dimm_slot;
1013 			values[i] = flt_stat->mf_dram_place;
1014 		} else {
1015 			synd[i] = 0;
1016 			dslot[i] = 0;
1017 			values[i] = 0;
1018 		}
1019 	}
1020 
1021 	fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts,
1022 	    synd, NULL);
1023 
1024 	fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY,
1025 	    nflts, dslot, NULL);
1026 
1027 	fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts,
1028 	    values, NULL);
1029 
1030 	device_path[0] = 0;
1031 	p = &device_path[0];
1032 	sid[0] = 0;
1033 	s = &sid[0];
1034 	ret = 0;
1035 
1036 	for (i = 0; i < nflts; i++) {
1037 		int bank;
1038 
1039 		flt_stat = mc_aflt->mflt_stat[i];
1040 		bank = flt_stat->mf_flt_maddr.ma_bank;
1041 		ret = mc_set_mem_unum(p + strlen(p), blen,
1042 		    flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type,
1043 		    flt_stat->mf_dimm_slot);
1044 
1045 		if (ret != 0) {
1046 			cmn_err(CE_WARN,
1047 			    "mc_ereport_post: Failed to determine the unum "
1048 			    "for board=%d bank=%d type=0x%x slot=0x%x",
1049 			    flt_stat->mf_flt_maddr.ma_bd, bank,
1050 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1051 			continue;
1052 		}
1053 		n = strlen(device_path);
1054 		blen = MAXPATHLEN - n;
1055 		p = &device_path[n];
1056 		if (i < (nflts - 1)) {
1057 			snprintf(p, blen, " ");
1058 			blen--;
1059 			p++;
1060 		}
1061 
1062 		if (ret == 0) {
1063 			ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s),
1064 			    blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank,
1065 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1066 
1067 		}
1068 	}
1069 
1070 	(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1071 	    device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset :
1072 	    (uint64_t)-1);
1073 
1074 	fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource,
1075 	    NULL);
1076 
1077 	if (panicstr) {
1078 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1079 	} else {
1080 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1081 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1082 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1083 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1084 	}
1085 }
1086 
1087 
1088 static void
1089 mc_err_drain(mc_aflt_t *mc_aflt)
1090 {
1091 	int rv;
1092 	uint64_t pa = (uint64_t)(-1);
1093 	int i;
1094 
1095 	MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class);
1096 	/*
1097 	 * we come here only when we have:
1098 	 * In mirror mode: MUE, SUE
1099 	 * In normal mode: UE, Permanent CE, Intermittent CE
1100 	 */
1101 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1102 		rv = mcaddr_to_pa(mc_aflt->mflt_mcp,
1103 		    &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa);
1104 
1105 		/* Ensure the pa is valid (not in isolated memory block) */
1106 		if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa))
1107 			mc_aflt->mflt_stat[i]->mf_flt_paddr = pa;
1108 		else
1109 			mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1;
1110 	}
1111 
1112 	MC_LOG("mc_err_drain:pa = %lx\n", pa);
1113 
1114 	switch (page_retire_check(pa, NULL)) {
1115 	case 0:
1116 	case EAGAIN:
1117 		MC_LOG("Page retired or pending\n");
1118 		return;
1119 	case EIO:
1120 		/*
1121 		 * Do page retirement except for the PCE and ICE cases.
1122 		 * This is taken care by the OPL DE
1123 		 */
1124 		if (mc_aflt->mflt_stat[0]->mf_type !=
1125 		    FLT_TYPE_INTERMITTENT_CE &&
1126 		    mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) {
1127 			MC_LOG("offline page at pa %lx error %x\n", pa,
1128 			    mc_aflt->mflt_pr);
1129 			(void) page_retire(pa, mc_aflt->mflt_pr);
1130 		}
1131 		break;
1132 	case EINVAL:
1133 	default:
1134 		/*
1135 		 * Some memory do not have page structure so
1136 		 * we keep going in case of EINVAL.
1137 		 */
1138 		break;
1139 	}
1140 
1141 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1142 		mc_aflt_t mc_aflt0;
1143 		if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) {
1144 			mc_aflt0 = *mc_aflt;
1145 			mc_aflt0.mflt_nflts = 1;
1146 			mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i];
1147 			mc_ereport_post(&mc_aflt0);
1148 		}
1149 	}
1150 }
1151 
1152 /*
1153  * The restart address is actually defined in unit of PA[37:6]
1154  * the mac patrol will convert that to dimm offset.  If the
1155  * address is not in the bank, it will continue to search for
1156  * the next PA that is within the bank.
1157  *
1158  * Also the mac patrol scans the dimms based on PA, not
1159  * dimm offset.
1160  */
1161 static int
1162 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info)
1163 {
1164 	uint64_t pa;
1165 	int rv;
1166 
1167 	if (MC_REWRITE_MODE(mcp, bank)) {
1168 		return (0);
1169 	}
1170 	if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) {
1171 		MAC_PTRL_START(mcp, bank);
1172 		return (0);
1173 	}
1174 
1175 	rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa);
1176 	if (rv != 0) {
1177 		MC_LOG("cannot convert mcaddr to pa. use auto restart\n");
1178 		MAC_PTRL_START(mcp, bank);
1179 		return (0);
1180 	}
1181 
1182 	if (!mc_rangecheck_pa(mcp, pa)) {
1183 		/* pa is not on this board, just retry */
1184 		cmn_err(CE_WARN, "restart_patrol: invalid address %lx "
1185 		    "on board %d\n", pa, mcp->mc_board_num);
1186 		MAC_PTRL_START(mcp, bank);
1187 		return (0);
1188 	}
1189 
1190 	MC_LOG("restart_patrol: pa = %lx\n", pa);
1191 
1192 	if (!rsaddr_info->mi_injectrestart) {
1193 		/*
1194 		 * For non-error injection restart we need to
1195 		 * determine if the current restart pa/page is
1196 		 * a "good" page. A "good" page is a page that
1197 		 * has not been page retired. If the current
1198 		 * page that contains the pa is "good", we will
1199 		 * do a HW auto restart and let HW patrol continue
1200 		 * where it last stopped. Most desired scenario.
1201 		 *
1202 		 * If the current page is not "good", we will advance
1203 		 * to the next page to find the next "good" page and
1204 		 * restart the patrol from there.
1205 		 */
1206 		int wrapcount = 0;
1207 		uint64_t origpa = pa;
1208 		while (wrapcount < 2) {
1209 			if (!pa_is_valid(mcp, pa)) {
1210 			/*
1211 			 * Not in physinstall - advance to the
1212 			 * next memory isolation blocksize
1213 			 */
1214 			MC_LOG("Invalid PA\n");
1215 			pa = roundup(pa + 1, mc_isolation_bsize);
1216 			} else {
1217 			int rv;
1218 			if ((rv = page_retire_check(pa, NULL)) != 0 &&
1219 			    rv != EAGAIN) {
1220 					/*
1221 					 * The page is "good" (not retired),
1222 					 * we will use automatic HW restart
1223 					 * algorithm if this is the original
1224 					 * current starting page.
1225 					 */
1226 				if (pa == origpa) {
1227 					MC_LOG("Page has no error. "
1228 					    "Auto restart\n");
1229 					MAC_PTRL_START(mcp, bank);
1230 					return (0);
1231 				} else {
1232 					/*
1233 					 * found a subsequent good page
1234 					 */
1235 					break;
1236 				}
1237 			}
1238 
1239 			/*
1240 			 * Skip to the next page
1241 			 */
1242 			pa = roundup(pa + 1, PAGESIZE);
1243 			MC_LOG("Skipping bad page to %lx\n", pa);
1244 			}
1245 
1246 		    /* Check to see if we hit the end of the memory range */
1247 			if (pa >= (mcp->mc_start_address + mcp->mc_size)) {
1248 			MC_LOG("Wrap around\n");
1249 			pa = mcp->mc_start_address;
1250 			wrapcount++;
1251 			}
1252 		}
1253 
1254 		if (wrapcount > 1) {
1255 			MC_LOG("Failed to find a good page. Just restart\n");
1256 			MAC_PTRL_START(mcp, bank);
1257 			return (0);
1258 		}
1259 	}
1260 
1261 	/*
1262 	 * We reached here either:
1263 	 * 1. We are doing an error injection restart that specify
1264 	 *    the exact pa/page to restart. OR
1265 	 * 2. We found a subsequent good page different from the
1266 	 *    original restart pa/page.
1267 	 * Restart MAC patrol: PA[37:6]
1268 	 */
1269 	MC_LOG("restart at pa = %lx\n", pa);
1270 	ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa));
1271 	MAC_PTRL_START_ADD(mcp, bank);
1272 
1273 	return (0);
1274 }
1275 
1276 static void
1277 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p)
1278 {
1279 	ASSERT(p != NULL);
1280 	p->ri_next = *q;
1281 	*q = p;
1282 }
1283 
1284 static mc_retry_info_t *
1285 mc_retry_info_get(mc_retry_info_t **q)
1286 {
1287 	mc_retry_info_t *p;
1288 
1289 	if ((p = *q) != NULL) {
1290 		*q = p->ri_next;
1291 		return (p);
1292 	} else {
1293 		return (NULL);
1294 	}
1295 }
1296 
1297 /*
1298  * Rewriting is used for two purposes.
1299  *  - to correct the error in memory.
1300  *  - to determine whether the error is permanent or intermittent.
1301  * It's done by writing the address in MAC_BANKm_REWRITE_ADD
1302  * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that,
1303  * REW_END (and REW_CE/REW_UE if some error detected) is set when
1304  * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM.
1305  *
1306  * Note that rewrite operation doesn't change RAW_UE to Marked UE.
1307  * Therefore, we use it only CE case.
1308  */
1309 
1310 static uint32_t
1311 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying)
1312 {
1313 	uint32_t cntl;
1314 	int count = 0;
1315 	int max_count;
1316 	int retry_state;
1317 
1318 	if (retrying)
1319 		max_count = 1;
1320 	else
1321 		max_count = mc_max_rewrite_loop;
1322 
1323 	retry_state = RETRY_STATE_PENDING;
1324 
1325 	if (!retrying && MC_REWRITE_MODE(mcp, bank)) {
1326 		goto timeout;
1327 	}
1328 
1329 	retry_state = RETRY_STATE_ACTIVE;
1330 
1331 	/* first wait to make sure PTRL_STATUS is 0 */
1332 	while (count++ < max_count) {
1333 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1334 		if (!(cntl & MAC_CNTL_PTRL_STATUS)) {
1335 			count = 0;
1336 			break;
1337 		}
1338 		drv_usecwait(mc_rewrite_delay);
1339 	}
1340 	if (count >= max_count)
1341 		goto timeout;
1342 
1343 	count = 0;
1344 
1345 	ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr);
1346 	MAC_REW_REQ(mcp, bank);
1347 
1348 	retry_state = RETRY_STATE_REWRITE;
1349 
1350 	do {
1351 		if (count++ > max_count) {
1352 			goto timeout;
1353 		} else {
1354 			drv_usecwait(mc_rewrite_delay);
1355 		}
1356 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1357 	/*
1358 	 * If there are other MEMORY or PCI activities, this
1359 	 * will be BUSY, else it should be set immediately
1360 	 */
1361 	} while (!(cntl & MAC_CNTL_REW_END));
1362 
1363 	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
1364 	return (cntl);
1365 timeout:
1366 	mc_set_rewrite(mcp, bank, dimm_addr, retry_state);
1367 
1368 	return (0);
1369 }
1370 
1371 void
1372 mc_clear_rewrite(mc_opl_t *mcp, int bank)
1373 {
1374 	struct mc_bank *bankp;
1375 	mc_retry_info_t *retry;
1376 	uint32_t rew_addr;
1377 
1378 	bankp = &(mcp->mc_bank[bank]);
1379 	retry = bankp->mcb_active;
1380 	bankp->mcb_active = NULL;
1381 	mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1382 
1383 again:
1384 	bankp->mcb_rewrite_count = 0;
1385 
1386 	while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) {
1387 		rew_addr = retry->ri_addr;
1388 		mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1389 		if (do_rewrite(mcp, bank, rew_addr, 1) == 0)
1390 			break;
1391 	}
1392 
1393 	/* we break out if no more pending rewrite or we got timeout again */
1394 
1395 	if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1396 		if (!IS_MIRROR(mcp, bank)) {
1397 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1398 		} else {
1399 			int mbank = bank ^ 1;
1400 			bankp = &(mcp->mc_bank[mbank]);
1401 			if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1402 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1403 			MC_CLEAR_REWRITE_MODE(mcp, mbank);
1404 			} else {
1405 			bank = mbank;
1406 			goto again;
1407 			}
1408 		}
1409 	}
1410 }
1411 
1412 void
1413 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state)
1414 {
1415 	mc_retry_info_t *retry;
1416 	struct mc_bank *bankp;
1417 
1418 	bankp = &mcp->mc_bank[bank];
1419 
1420 	retry = mc_retry_info_get(&bankp->mcb_retry_freelist);
1421 
1422 	ASSERT(retry != NULL);
1423 
1424 	retry->ri_addr = addr;
1425 	retry->ri_state = state;
1426 
1427 	MC_SET_REWRITE_MODE(mcp, bank);
1428 
1429 	if ((state > RETRY_STATE_PENDING)) {
1430 		ASSERT(bankp->mcb_active == NULL);
1431 		bankp->mcb_active = retry;
1432 	} else {
1433 		mc_retry_info_put(&bankp->mcb_retry_pending, retry);
1434 	}
1435 
1436 	if (IS_MIRROR(mcp, bank)) {
1437 		int mbank = bank ^1;
1438 		MC_SET_REWRITE_MODE(mcp, mbank);
1439 	}
1440 }
1441 
1442 void
1443 mc_process_scf_log(mc_opl_t *mcp)
1444 {
1445 	int count;
1446 	int n = 0;
1447 	scf_log_t *p;
1448 	int bank;
1449 
1450 	for (bank = 0; bank < BANKNUM_PER_SB; bank++) {
1451 		while ((p = mcp->mc_scf_log[bank]) != NULL &&
1452 		    (n < mc_max_errlog_processed)) {
1453 		ASSERT(bank == p->sl_bank);
1454 		count = 0;
1455 		while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank))
1456 		    & MAC_STATIC_ERR_VLD)) {
1457 			if (count++ >= (mc_max_scf_loop)) {
1458 				break;
1459 			}
1460 			drv_usecwait(mc_scf_delay);
1461 		}
1462 
1463 		if (count < mc_max_scf_loop) {
1464 			ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank),
1465 			    p->sl_err_log);
1466 
1467 			ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank),
1468 			    p->sl_err_add|MAC_STATIC_ERR_VLD);
1469 			mcp->mc_scf_retry[bank] = 0;
1470 		} else {
1471 			/*
1472 			 * if we try too many times, just drop the req
1473 			 */
1474 			if (mcp->mc_scf_retry[bank]++ <=
1475 			    mc_max_scf_retry) {
1476 				return;
1477 			} else {
1478 				if ((++mc_pce_dropped & 0xff) == 0) {
1479 					cmn_err(CE_WARN, "Cannot "
1480 					    "report Permanent CE to "
1481 					    "SCF\n");
1482 				}
1483 			}
1484 		}
1485 		n++;
1486 		mcp->mc_scf_log[bank] = p->sl_next;
1487 		mcp->mc_scf_total[bank]--;
1488 		ASSERT(mcp->mc_scf_total[bank] >= 0);
1489 		kmem_free(p, sizeof (scf_log_t));
1490 		}
1491 	}
1492 }
1493 void
1494 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank)
1495 {
1496 	scf_log_t *p;
1497 
1498 	if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) {
1499 		if ((++mc_pce_dropped & 0xff) == 0) {
1500 			cmn_err(CE_WARN, "Too many Permanent CE requests.\n");
1501 		}
1502 		return;
1503 	}
1504 	p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP);
1505 	p->sl_next = 0;
1506 	p->sl_err_add = flt_stat->mf_err_add;
1507 	p->sl_err_log = flt_stat->mf_err_log;
1508 	p->sl_bank = bank;
1509 
1510 	if (mcp->mc_scf_log[bank] == NULL) {
1511 		/*
1512 		 * we rely on mc_scf_log to detect NULL queue.
1513 		 * mc_scf_log_tail is irrelevant is such case.
1514 		 */
1515 		mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p;
1516 	} else {
1517 		mcp->mc_scf_log_tail[bank]->sl_next = p;
1518 		mcp->mc_scf_log_tail[bank] = p;
1519 	}
1520 	mcp->mc_scf_total[bank]++;
1521 }
1522 /*
1523  * This routine determines what kind of CE happens, intermittent
1524  * or permanent as follows. (See 4.7.3 in Columbus2 PRM.)
1525  * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register.
1526  * - If CE is still detected on the same address even after doing
1527  *   rewrite operation twice, it is determined as permanent error.
1528  * - If error is not detected anymore, it is determined as intermittent
1529  *   error.
1530  * - If UE is detected due to rewrite operation, it should be treated
1531  *   as UE.
1532  */
1533 
1534 /* ARGSUSED */
1535 static void
1536 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error)
1537 {
1538 	uint32_t cntl;
1539 	int i;
1540 
1541 	flt_stat->mf_type = FLT_TYPE_PERMANENT_CE;
1542 	/*
1543 	 * rewrite request 1st time reads and correct error data
1544 	 * and write to DIMM.  2nd rewrite request must be issued
1545 	 * after REW_CE/UE/END is 0.  When the 2nd request is completed,
1546 	 * if REW_CE = 1, then it is permanent CE.
1547 	 */
1548 	for (i = 0; i < 2; i++) {
1549 		cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0);
1550 
1551 		if (cntl == 0) {
1552 			/* timeout case */
1553 			return;
1554 		}
1555 		/*
1556 		 * If the error becomes UE or CMPE
1557 		 * we return to the caller immediately.
1558 		 */
1559 		if (cntl & MAC_CNTL_REW_UE) {
1560 			if (ptrl_error)
1561 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE;
1562 			else
1563 				flt_stat->mf_cntl |= MAC_CNTL_MI_UE;
1564 			flt_stat->mf_type = FLT_TYPE_UE;
1565 			return;
1566 		}
1567 		if (cntl & MAC_CNTL_REW_CMPE) {
1568 			if (ptrl_error)
1569 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE;
1570 			else
1571 				flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE;
1572 			flt_stat->mf_type = FLT_TYPE_CMPE;
1573 			return;
1574 		}
1575 	}
1576 	if (!(cntl & MAC_CNTL_REW_CE)) {
1577 		flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE;
1578 	}
1579 
1580 	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1581 		/* report PERMANENT_CE to SP via SCF */
1582 		if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) {
1583 			mc_queue_scf_log(mcp, flt_stat, bank);
1584 		}
1585 	}
1586 }
1587 
1588 #define	IS_CMPE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\
1589 				MAC_CNTL_MI_CMPE))
1590 #define	IS_UE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE))
1591 #define	IS_CE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE))
1592 #define	IS_OK(cntl, f)	(!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \
1593 			MAC_CNTL_MI_ERRS)))
1594 
1595 
1596 static int
1597 IS_CE_ONLY(uint32_t cntl, int ptrl_error)
1598 {
1599 	if (ptrl_error) {
1600 		return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE);
1601 	} else {
1602 		return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE);
1603 	}
1604 }
1605 
1606 void
1607 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value)
1608 {
1609 	int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
1610 
1611 	if (mcp->mc_speedup_period[ebank] > 0)
1612 		value |= mc_max_speed;
1613 	else
1614 		value |= mcp->mc_speed;
1615 	ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value);
1616 }
1617 
1618 static void
1619 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1620 {
1621 	flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1622 	    MAC_CNTL_PTRL_ERRS;
1623 	flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank));
1624 	flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank));
1625 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1626 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1627 	flt_stat->mf_flt_maddr.ma_bank = bank;
1628 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1629 }
1630 
1631 static void
1632 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1633 {
1634 	uint32_t status, old_status;
1635 
1636 	status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS;
1637 	old_status = 0;
1638 
1639 	/* we keep reading until the status is stable */
1640 	while (old_status != status) {
1641 		old_status = status;
1642 		flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank));
1643 		flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank));
1644 		status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1645 		    MAC_CNTL_MI_ERRS;
1646 		if (status == old_status) {
1647 			break;
1648 		}
1649 	}
1650 
1651 	flt_stat->mf_cntl = status;
1652 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1653 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1654 	flt_stat->mf_flt_maddr.ma_bank = bank;
1655 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1656 }
1657 
1658 
1659 /*
1660  * Error philosophy for mirror mode:
1661  *
1662  * PTRL (The error address for both banks are same, since ptrl stops if it
1663  * detects error.)
1664  * - Compare error  log CMPE.
1665  *
1666  * - UE-UE           Report MUE.  No rewrite.
1667  *
1668  * - UE-*	     UE-(CE/OK). Rewrite to scrub UE.  Report SUE.
1669  *
1670  * - CE-*            CE-(CE/OK). Scrub to determine if CE is permanent.
1671  *                   If CE is permanent, inform SCF.  Once for each
1672  *		     Dimm.  If CE becomes UE or CMPE, go back to above.
1673  *
1674  *
1675  * MI (The error addresses for each bank are the same or different.)
1676  * - Compare  error  If addresses are the same.  Just CMPE, so log CMPE.
1677  *		     If addresses are different (this could happen
1678  *		     as a result of scrubbing.  Report each separately.
1679  *		     Only report error info on each side.
1680  *
1681  * - UE-UE           Addresses are the same.  Report MUE.
1682  *		     Addresses are different.  Report SUE on each bank.
1683  *		     Rewrite to clear UE.
1684  *
1685  * - UE-*	     UE-(CE/OK)
1686  *		     Rewrite to clear UE.  Report SUE for the bank.
1687  *
1688  * - CE-*            CE-(CE/OK).  Scrub to determine if CE is permanent.
1689  *                   If CE becomes UE or CMPE, go back to above.
1690  *
1691  */
1692 
1693 static int
1694 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat)
1695 {
1696 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1697 	int i;
1698 	int rv = 0;
1699 	int bank;
1700 	int rewrite_timeout = 0;
1701 
1702 	MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n",
1703 	    flt_stat[0].mf_cntl, flt_stat[1].mf_cntl);
1704 
1705 	if (ptrl_error) {
1706 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1707 		    MAC_CNTL_PTRL_ERRS) == 0)
1708 			return (0);
1709 	} else {
1710 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1711 		    MAC_CNTL_MI_ERRS) == 0)
1712 			return (0);
1713 	}
1714 
1715 	/*
1716 	 * First we take care of the case of CE
1717 	 * because they can become UE or CMPE
1718 	 */
1719 	for (i = 0; i < 2; i++) {
1720 		if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) {
1721 			bank = flt_stat[i].mf_flt_maddr.ma_bank;
1722 			MC_LOG("CE detected on bank %d\n", bank);
1723 			mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error);
1724 			if (MC_REWRITE_ACTIVE(mcp, bank)) {
1725 				rewrite_timeout = 1;
1726 			}
1727 			rv = 1;
1728 		}
1729 	}
1730 
1731 	if (rewrite_timeout)
1732 		return (0);
1733 
1734 	/* The above scrubbing can turn CE into UE or CMPE */
1735 
1736 	/*
1737 	 * Now we distinguish two cases: same address or not
1738 	 * the same address.  It might seem more intuitive to
1739 	 * distinguish PTRL v.s. MI error but it is more
1740 	 * complicated that way.
1741 	 */
1742 
1743 	if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) {
1744 
1745 		if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) ||
1746 		    IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) {
1747 			flt_stat[0].mf_type = FLT_TYPE_CMPE;
1748 			flt_stat[1].mf_type = FLT_TYPE_CMPE;
1749 			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1750 			mc_aflt->mflt_nflts = 2;
1751 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1752 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1753 			mc_aflt->mflt_pr = PR_UE;
1754 			/*
1755 			 * Compare error is result of MAC internal error, so
1756 			 * simply log it instead of publishing an ereport. SCF
1757 			 * diagnoses all the MAC internal and its i/f error.
1758 			 */
1759 			MC_LOG("cmpe error detected\n");
1760 			return (1);
1761 		}
1762 
1763 		if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) &&
1764 		    IS_UE(flt_stat[1].mf_cntl, ptrl_error)) {
1765 			/* Both side are UE's */
1766 
1767 			MAC_SET_ERRLOG_INFO(&flt_stat[0]);
1768 			MAC_SET_ERRLOG_INFO(&flt_stat[1]);
1769 			MC_LOG("MUE detected\n");
1770 			flt_stat[0].mf_type = FLT_TYPE_MUE;
1771 			flt_stat[1].mf_type = FLT_TYPE_MUE;
1772 			mc_aflt->mflt_erpt_class = MC_OPL_MUE;
1773 			mc_aflt->mflt_nflts = 2;
1774 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1775 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1776 			mc_aflt->mflt_pr = PR_UE;
1777 			mc_err_drain(mc_aflt);
1778 			return (1);
1779 		}
1780 
1781 		/* Now the only case is UE/CE, UE/OK, or don't care */
1782 		for (i = 0; i < 2; i++) {
1783 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1784 
1785 			/* rewrite can clear the one side UE error */
1786 
1787 			if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) {
1788 				(void) do_rewrite(mcp,
1789 				    flt_stat[i].mf_flt_maddr.ma_bank,
1790 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0);
1791 			}
1792 			flt_stat[i].mf_type = FLT_TYPE_UE;
1793 			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1794 			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1795 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1796 			mc_aflt->mflt_nflts = 1;
1797 			mc_aflt->mflt_pr = PR_MCE;
1798 			mc_err_drain(mc_aflt);
1799 			/* Once we hit a UE/CE or UE/OK case, done */
1800 			return (1);
1801 			}
1802 		}
1803 
1804 	} else {
1805 		/*
1806 		 * addresses are different. That means errors
1807 		 * on the 2 banks are not related at all.
1808 		 */
1809 		for (i = 0; i < 2; i++) {
1810 			if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) {
1811 				flt_stat[i].mf_type = FLT_TYPE_CMPE;
1812 				mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1813 				mc_aflt->mflt_nflts = 1;
1814 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1815 				mc_aflt->mflt_pr = PR_UE;
1816 				/*
1817 				 * Compare error is result of MAC internal
1818 				 * error, so simply log it instead of
1819 				 * publishing an ereport. SCF diagnoses all
1820 				 * the MAC internal and its interface error.
1821 				 */
1822 				MC_LOG("cmpe error detected\n");
1823 				/* no more report on this bank */
1824 				flt_stat[i].mf_cntl = 0;
1825 				rv = 1;
1826 			}
1827 		}
1828 
1829 		/* rewrite can clear the one side UE error */
1830 
1831 		for (i = 0; i < 2; i++) {
1832 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1833 				(void) do_rewrite(mcp,
1834 				    flt_stat[i].mf_flt_maddr.ma_bank,
1835 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr,
1836 				    0);
1837 				flt_stat[i].mf_type = FLT_TYPE_UE;
1838 				MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1839 				mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1840 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1841 				mc_aflt->mflt_nflts = 1;
1842 				mc_aflt->mflt_pr = PR_MCE;
1843 				mc_err_drain(mc_aflt);
1844 				rv = 1;
1845 			}
1846 		}
1847 	}
1848 	return (rv);
1849 }
1850 static void
1851 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1852 {
1853 	mc_aflt_t mc_aflt;
1854 	mc_flt_stat_t flt_stat[2], mi_flt_stat[2];
1855 	int i;
1856 	int mi_valid;
1857 
1858 	ASSERT(rsaddr);
1859 
1860 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1861 	bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t));
1862 	bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t));
1863 
1864 
1865 	mc_aflt.mflt_mcp = mcp;
1866 	mc_aflt.mflt_id = gethrtime();
1867 
1868 	/* Now read all the registers into flt_stat */
1869 
1870 	for (i = 0; i < 2; i++) {
1871 		MC_LOG("Reading registers of bank %d\n", bank);
1872 		/* patrol registers */
1873 		mc_read_ptrl_reg(mcp, bank, &flt_stat[i]);
1874 
1875 		/*
1876 		 * In mirror mode, it is possible that only one bank
1877 		 * may report the error. We need to check for it to
1878 		 * ensure we pick the right addr value for patrol restart.
1879 		 * Note that if both banks reported errors, we pick the
1880 		 * 2nd one. Both banks should reported the same error address.
1881 		 */
1882 		if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS)
1883 			rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr;
1884 
1885 		MC_LOG("ptrl registers cntl %x add %x log %x\n",
1886 		    flt_stat[i].mf_cntl, flt_stat[i].mf_err_add,
1887 		    flt_stat[i].mf_err_log);
1888 
1889 		/* MI registers */
1890 		mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]);
1891 
1892 		MC_LOG("MI registers cntl %x add %x log %x\n",
1893 		    mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add,
1894 		    mi_flt_stat[i].mf_err_log);
1895 
1896 		bank = bank^1;
1897 	}
1898 
1899 	/* clear errors once we read all the registers */
1900 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1901 
1902 	MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1903 
1904 	/* Process MI errors first */
1905 
1906 	/* if not error mode, cntl1 is 0 */
1907 	if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1908 	    (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1909 		mi_flt_stat[0].mf_cntl = 0;
1910 
1911 	if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1912 	    (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1913 		mi_flt_stat[1].mf_cntl = 0;
1914 
1915 	mc_aflt.mflt_is_ptrl = 0;
1916 	mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]);
1917 
1918 	if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1919 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl &
1920 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1921 	    (flt_stat[0].mf_err_add == mi_flt_stat[0].mf_err_add) &&
1922 	    (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1923 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl &
1924 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1925 	    (flt_stat[1].mf_err_add == mi_flt_stat[1].mf_err_add)) {
1926 #ifdef DEBUG
1927 		MC_LOG("discarding PTRL error because "
1928 		    "it is the same as MI\n");
1929 #endif
1930 		rsaddr->mi_valid = mi_valid;
1931 		return;
1932 	}
1933 	/* if not error mode, cntl1 is 0 */
1934 	if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1935 	    (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1936 		flt_stat[0].mf_cntl = 0;
1937 
1938 	if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1939 	    (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1940 		flt_stat[1].mf_cntl = 0;
1941 
1942 	mc_aflt.mflt_is_ptrl = 1;
1943 	rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]);
1944 }
1945 static int
1946 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt,
1947 	mc_flt_stat_t *flt_stat)
1948 {
1949 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1950 	int rv = 0;
1951 
1952 	mc_aflt->mflt_erpt_class = NULL;
1953 	if (IS_UE(flt_stat->mf_cntl, ptrl_error)) {
1954 		MC_LOG("UE detected\n");
1955 		flt_stat->mf_type = FLT_TYPE_UE;
1956 		mc_aflt->mflt_erpt_class = MC_OPL_UE;
1957 		mc_aflt->mflt_pr = PR_UE;
1958 		MAC_SET_ERRLOG_INFO(flt_stat);
1959 		rv = 1;
1960 	} else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) {
1961 		MC_LOG("CE detected\n");
1962 		MAC_SET_ERRLOG_INFO(flt_stat);
1963 
1964 		/* Error type can change after scrubbing */
1965 		mc_scrub_ce(mcp, bank, flt_stat, ptrl_error);
1966 		if (MC_REWRITE_ACTIVE(mcp, bank)) {
1967 			return (0);
1968 		}
1969 
1970 		if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) {
1971 			mc_aflt->mflt_erpt_class = MC_OPL_ICE;
1972 			mc_aflt->mflt_pr = PR_MCE;
1973 		} else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1974 			mc_aflt->mflt_erpt_class = MC_OPL_CE;
1975 			mc_aflt->mflt_pr = PR_MCE;
1976 		} else if (flt_stat->mf_type == FLT_TYPE_UE) {
1977 			mc_aflt->mflt_erpt_class = MC_OPL_UE;
1978 			mc_aflt->mflt_pr = PR_UE;
1979 		}
1980 		rv = 1;
1981 	}
1982 	MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type,
1983 	    mc_aflt->mflt_erpt_class);
1984 	if (mc_aflt->mflt_erpt_class) {
1985 		mc_aflt->mflt_stat[0] = flt_stat;
1986 		mc_aflt->mflt_nflts = 1;
1987 		mc_err_drain(mc_aflt);
1988 	}
1989 	return (rv);
1990 }
1991 
1992 static void
1993 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1994 {
1995 	mc_aflt_t mc_aflt;
1996 	mc_flt_stat_t flt_stat, mi_flt_stat;
1997 	int mi_valid;
1998 
1999 	bzero(&mc_aflt, sizeof (mc_aflt_t));
2000 	bzero(&flt_stat, sizeof (mc_flt_stat_t));
2001 	bzero(&mi_flt_stat, sizeof (mc_flt_stat_t));
2002 
2003 	mc_aflt.mflt_mcp = mcp;
2004 	mc_aflt.mflt_id = gethrtime();
2005 
2006 	/* patrol registers */
2007 	mc_read_ptrl_reg(mcp, bank, &flt_stat);
2008 
2009 	ASSERT(rsaddr);
2010 	rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr;
2011 
2012 	MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl,
2013 	    flt_stat.mf_err_add, flt_stat.mf_err_log);
2014 
2015 	/* MI registers */
2016 	mc_read_mi_reg(mcp, bank, &mi_flt_stat);
2017 
2018 
2019 	MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl,
2020 	    mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log);
2021 
2022 	/* clear errors once we read all the registers */
2023 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
2024 
2025 	mc_aflt.mflt_is_ptrl = 0;
2026 	if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) &&
2027 	    ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2028 	    ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2029 		mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat);
2030 	}
2031 
2032 	if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >>
2033 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl &
2034 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
2035 	    (flt_stat.mf_err_add == mi_flt_stat.mf_err_add)) {
2036 #ifdef DEBUG
2037 		MC_LOG("discarding PTRL error because "
2038 		    "it is the same as MI\n");
2039 #endif
2040 		rsaddr->mi_valid = mi_valid;
2041 		return;
2042 	}
2043 
2044 	mc_aflt.mflt_is_ptrl = 1;
2045 	if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) &&
2046 	    ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2047 	    ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2048 		rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt,
2049 		    &flt_stat);
2050 	}
2051 }
2052 /*
2053  *	memory patrol error handling algorithm:
2054  *	timeout() is used to do periodic polling
2055  *	This is the flow chart.
2056  *	timeout ->
2057  *	mc_check_errors()
2058  *	    if memory bank is installed, read the status register
2059  *	    if any error bit is set,
2060  *	    -> mc_error_handler()
2061  *		-> read all error registers
2062  *	        -> mc_process_error()
2063  *	            determine error type
2064  *	            rewrite to clear error or scrub to determine CE type
2065  *	            inform SCF on permanent CE
2066  *	        -> mc_err_drain
2067  *	            page offline processing
2068  *	            -> mc_ereport_post()
2069  */
2070 
2071 static void
2072 mc_process_rewrite(mc_opl_t *mcp, int bank)
2073 {
2074 	uint32_t rew_addr, cntl;
2075 	mc_retry_info_t *retry;
2076 	struct mc_bank *bankp;
2077 
2078 	bankp = &(mcp->mc_bank[bank]);
2079 	retry = bankp->mcb_active;
2080 	if (retry == NULL)
2081 		return;
2082 
2083 	if (retry->ri_state <= RETRY_STATE_ACTIVE) {
2084 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
2085 		if (cntl & MAC_CNTL_PTRL_STATUS)
2086 			return;
2087 		rew_addr = retry->ri_addr;
2088 		ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr);
2089 		MAC_REW_REQ(mcp, bank);
2090 
2091 		retry->ri_state = RETRY_STATE_REWRITE;
2092 	}
2093 
2094 	cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank));
2095 
2096 	if (cntl & MAC_CNTL_REW_END) {
2097 		MAC_CLEAR_ERRS(mcp, bank,
2098 		    MAC_CNTL_REW_ERRS);
2099 		mc_clear_rewrite(mcp, bank);
2100 	} else {
2101 		/*
2102 		 * If the rewrite does not complete in
2103 		 * 1 hour, we have to consider this a HW
2104 		 * failure.  However, there is no recovery
2105 		 * mechanism.  The only thing we can do
2106 		 * to to print a warning message to the
2107 		 * console.  We continue to increment the
2108 		 * counter but we only print the message
2109 		 * once.  It will take the counter a long
2110 		 * time to wrap around and the user might
2111 		 * see a second message.  In practice,
2112 		 * we have never hit this condition but
2113 		 * we have to keep the code here just in case.
2114 		 */
2115 		if (++mcp->mc_bank[bank].mcb_rewrite_count
2116 		    == mc_max_rewrite_retry) {
2117 			cmn_err(CE_WARN, "Memory patrol feature is"
2118 			" partly suspended on /LSB%d/B%d"
2119 			" due to heavy memory load,"
2120 			" and it will restart"
2121 			" automatically.\n", mcp->mc_board_num,
2122 			    bank);
2123 		}
2124 	}
2125 }
2126 
2127 static void
2128 mc_check_errors_func(mc_opl_t *mcp)
2129 {
2130 	mc_rsaddr_info_t rsaddr_info;
2131 	int i, error_count = 0;
2132 	uint32_t stat, cntl;
2133 	int running;
2134 	int wrapped;
2135 	int ebk;
2136 
2137 	/*
2138 	 * scan errors.
2139 	 */
2140 	if (mcp->mc_status & MC_MEMORYLESS)
2141 		return;
2142 
2143 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2144 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2145 			if (MC_REWRITE_ACTIVE(mcp, i)) {
2146 				mc_process_rewrite(mcp, i);
2147 			}
2148 			stat = ldphysio(MAC_PTRL_STAT(mcp, i));
2149 			cntl = ldphysio(MAC_PTRL_CNTL(mcp, i));
2150 			running = cntl & MAC_CNTL_PTRL_START;
2151 			wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX;
2152 
2153 			/* Compute the effective bank idx */
2154 			ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i;
2155 
2156 			if (mc_debug_show_all || stat) {
2157 				MC_LOG("/LSB%d/B%d stat %x cntl %x\n",
2158 				    mcp->mc_board_num, i, stat, cntl);
2159 			}
2160 
2161 			/*
2162 			 * Update stats and reset flag if the HW patrol
2163 			 * wrapped around in its scan.
2164 			 */
2165 			if (wrapped) {
2166 				MAC_CLEAR_MAX(mcp, i);
2167 				mcp->mc_period[ebk]++;
2168 				if (IS_MIRROR(mcp, i)) {
2169 					MC_LOG("mirror mc period %ld on "
2170 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2171 					    mcp->mc_board_num, i);
2172 				} else {
2173 					MC_LOG("mc period %ld on "
2174 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2175 					    mcp->mc_board_num, i);
2176 				}
2177 			}
2178 
2179 			if (running) {
2180 				/*
2181 				 * Mac patrol HW is still running.
2182 				 * Normally when an error is detected,
2183 				 * the HW patrol will stop so that we
2184 				 * can collect error data for reporting.
2185 				 * Certain errors (MI errors) detected may not
2186 				 * cause the HW patrol to stop which is a
2187 				 * problem since we cannot read error data while
2188 				 * the HW patrol is running. SW is not allowed
2189 				 * to stop the HW patrol while it is running
2190 				 * as it may cause HW inconsistency. This is
2191 				 * described in a HW errata.
2192 				 * In situations where we detected errors
2193 				 * that may not cause the HW patrol to stop.
2194 				 * We speed up the HW patrol scanning in
2195 				 * the hope that it will find the 'real' PTRL
2196 				 * errors associated with the previous errors
2197 				 * causing the HW to finally stop so that we
2198 				 * can do the reporting.
2199 				 */
2200 				/*
2201 				 * Check to see if we did speed up
2202 				 * the HW patrol due to previous errors
2203 				 * detected that did not cause the patrol
2204 				 * to stop. We only do it if HW patrol scan
2205 				 * wrapped (counted as completing a 'period').
2206 				 */
2207 				if (mcp->mc_speedup_period[ebk] > 0) {
2208 					if (wrapped &&
2209 					    (--mcp->mc_speedup_period[ebk] ==
2210 					    0)) {
2211 						/*
2212 						 * We did try to speed up.
2213 						 * The speed up period has
2214 						 * expired and the HW patrol
2215 						 * is still running.  The
2216 						 * errors must be intermittent.
2217 						 * We have no choice but to
2218 						 * ignore them, reset the scan
2219 						 * speed to normal and clear
2220 						 * the MI error bits. For
2221 						 * mirror mode, we need to
2222 						 * clear errors on both banks.
2223 						 */
2224 						MC_LOG("Clearing MI errors\n");
2225 						MAC_CLEAR_ERRS(mcp, i,
2226 						    MAC_CNTL_MI_ERRS);
2227 
2228 						if (IS_MIRROR(mcp, i)) {
2229 							MC_LOG("Clearing "
2230 							    "Mirror MI errs\n");
2231 							MAC_CLEAR_ERRS(mcp,
2232 							    i^1,
2233 							    MAC_CNTL_MI_ERRS);
2234 						}
2235 					}
2236 				} else if (stat & MAC_STAT_MI_ERRS) {
2237 					/*
2238 					 * MI errors detected but we cannot
2239 					 * report them since the HW patrol
2240 					 * is still running.
2241 					 * We will attempt to speed up the
2242 					 * scanning and hopefully the HW
2243 					 * can detect PRTL errors at the same
2244 					 * location that cause the HW patrol
2245 					 * to stop.
2246 					 */
2247 					mcp->mc_speedup_period[ebk] = 2;
2248 					MAC_CMD(mcp, i, 0);
2249 				}
2250 			} else if (stat & (MAC_STAT_PTRL_ERRS |
2251 			    MAC_STAT_MI_ERRS)) {
2252 				/*
2253 				 * HW Patrol has stopped and we found errors.
2254 				 * Proceed to collect and report error info.
2255 				 */
2256 				mcp->mc_speedup_period[ebk] = 0;
2257 				rsaddr_info.mi_valid = 0;
2258 				rsaddr_info.mi_injectrestart = 0;
2259 				if (IS_MIRROR(mcp, i)) {
2260 					mc_error_handler_mir(mcp, i,
2261 					    &rsaddr_info);
2262 				} else {
2263 					mc_error_handler(mcp, i, &rsaddr_info);
2264 				}
2265 
2266 				error_count++;
2267 				restart_patrol(mcp, i, &rsaddr_info);
2268 			} else {
2269 				/*
2270 				 * HW patrol scan has apparently stopped
2271 				 * but no errors detected/flagged.
2272 				 * Restart the HW patrol just to be sure.
2273 				 * In mirror mode, the odd bank might have
2274 				 * reported errors that caused the patrol to
2275 				 * stop. We'll defer the restart to the odd
2276 				 * bank in this case.
2277 				 */
2278 				if (!IS_MIRROR(mcp, i) || (i & 0x1))
2279 					restart_patrol(mcp, i, NULL);
2280 			}
2281 		}
2282 	}
2283 	if (error_count > 0)
2284 		mcp->mc_last_error += error_count;
2285 	else
2286 		mcp->mc_last_error = 0;
2287 }
2288 
2289 /*
2290  * mc_polling -- Check errors for only one instance,
2291  * but process errors for all instances to make sure we drain the errors
2292  * faster than they can be accumulated.
2293  *
2294  * Polling on each board should be done only once per each
2295  * mc_patrol_interval_sec.  This is equivalent to setting mc_tick_left
2296  * to OPL_MAX_BOARDS and decrement by 1 on each timeout.
2297  * Once mc_tick_left becomes negative, the board becomes a candidate
2298  * for polling because it has waited for at least
2299  * mc_patrol_interval_sec's long.    If mc_timeout_period is calculated
2300  * differently, this has to be updated accordingly.
2301  */
2302 
2303 static void
2304 mc_polling(void)
2305 {
2306 	int i, scan_error;
2307 	mc_opl_t *mcp;
2308 
2309 
2310 	scan_error = 1;
2311 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2312 		mutex_enter(&mcmutex);
2313 		if ((mcp = mc_instances[i]) == NULL) {
2314 			mutex_exit(&mcmutex);
2315 			continue;
2316 		}
2317 		mutex_enter(&mcp->mc_lock);
2318 		mutex_exit(&mcmutex);
2319 		if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2320 			mutex_exit(&mcp->mc_lock);
2321 			continue;
2322 		}
2323 		if (scan_error && mcp->mc_tick_left <= 0) {
2324 			mc_check_errors_func((void *)mcp);
2325 			mcp->mc_tick_left = OPL_MAX_BOARDS;
2326 			scan_error = 0;
2327 		} else {
2328 			mcp->mc_tick_left--;
2329 		}
2330 		mc_process_scf_log(mcp);
2331 		mutex_exit(&mcp->mc_lock);
2332 	}
2333 }
2334 
2335 static void
2336 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr)
2337 {
2338 	maddr->ma_bd = mcp->mc_board_num;
2339 	maddr->ma_bank = bank;
2340 	maddr->ma_dimm_addr = 0;
2341 }
2342 
2343 typedef struct mc_mem_range {
2344 	uint64_t	addr;
2345 	uint64_t	size;
2346 } mc_mem_range_t;
2347 
2348 static int
2349 get_base_address(mc_opl_t *mcp)
2350 {
2351 	mc_mem_range_t *mem_range;
2352 	int len;
2353 
2354 	if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2355 	    "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) {
2356 		return (DDI_FAILURE);
2357 	}
2358 
2359 	mcp->mc_start_address = mem_range->addr;
2360 	mcp->mc_size = mem_range->size;
2361 
2362 	kmem_free(mem_range, len);
2363 	return (DDI_SUCCESS);
2364 }
2365 
2366 struct mc_addr_spec {
2367 	uint32_t bank;
2368 	uint32_t phys_hi;
2369 	uint32_t phys_lo;
2370 };
2371 
2372 #define	REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo)
2373 
2374 static char *mc_tbl_name[] = {
2375 	"cs0-mc-pa-trans-table",
2376 	"cs1-mc-pa-trans-table"
2377 };
2378 
2379 /*
2380  * This routine performs a rangecheck for a given PA
2381  * to see if it belongs to the memory range for this board.
2382  * Return 1 if it is valid (within the range) and 0 otherwise
2383  */
2384 static int
2385 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa)
2386 {
2387 	if ((pa < mcp->mc_start_address) || (mcp->mc_start_address +
2388 	    mcp->mc_size <= pa))
2389 		return (0);
2390 	else
2391 		return (1);
2392 }
2393 
2394 static void
2395 mc_memlist_delete(struct memlist *mlist)
2396 {
2397 	struct memlist *ml;
2398 
2399 	for (ml = mlist; ml; ml = mlist) {
2400 		mlist = ml->next;
2401 		kmem_free(ml, sizeof (struct memlist));
2402 	}
2403 }
2404 
2405 static struct memlist *
2406 mc_memlist_dup(struct memlist *mlist)
2407 {
2408 	struct memlist *hl = NULL, *tl, **mlp;
2409 
2410 	if (mlist == NULL)
2411 		return (NULL);
2412 
2413 	mlp = &hl;
2414 	tl = *mlp;
2415 	for (; mlist; mlist = mlist->next) {
2416 		*mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP);
2417 		(*mlp)->address = mlist->address;
2418 		(*mlp)->size = mlist->size;
2419 		(*mlp)->prev = tl;
2420 		tl = *mlp;
2421 		mlp = &((*mlp)->next);
2422 	}
2423 	*mlp = NULL;
2424 
2425 	return (hl);
2426 }
2427 
2428 
2429 static struct memlist *
2430 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
2431 {
2432 	uint64_t	end;
2433 	struct memlist	*ml, *tl, *nlp;
2434 
2435 	if (mlist == NULL)
2436 		return (NULL);
2437 
2438 	end = base + len;
2439 	if ((end <= mlist->address) || (base == end))
2440 		return (mlist);
2441 
2442 	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
2443 		uint64_t	mend;
2444 
2445 		nlp = ml->next;
2446 
2447 		if (end <= ml->address)
2448 			break;
2449 
2450 		mend = ml->address + ml->size;
2451 		if (base < mend) {
2452 			if (base <= ml->address) {
2453 				ml->address = end;
2454 				if (end >= mend)
2455 					ml->size = 0ull;
2456 				else
2457 					ml->size = mend - ml->address;
2458 			} else {
2459 				ml->size = base - ml->address;
2460 				if (end < mend) {
2461 					struct memlist	*nl;
2462 					/*
2463 					 * splitting an memlist entry.
2464 					 */
2465 					nl = kmem_alloc(sizeof (struct memlist),
2466 					    KM_SLEEP);
2467 					nl->address = end;
2468 					nl->size = mend - nl->address;
2469 					if ((nl->next = nlp) != NULL)
2470 						nlp->prev = nl;
2471 					nl->prev = ml;
2472 					ml->next = nl;
2473 					nlp = nl;
2474 				}
2475 			}
2476 			if (ml->size == 0ull) {
2477 				if (ml == mlist) {
2478 					if ((mlist = nlp) != NULL)
2479 						nlp->prev = NULL;
2480 					kmem_free(ml, sizeof (struct memlist));
2481 					if (mlist == NULL)
2482 						break;
2483 					ml = nlp;
2484 				} else {
2485 					if ((tl->next = nlp) != NULL)
2486 						nlp->prev = tl;
2487 					kmem_free(ml, sizeof (struct memlist));
2488 					ml = tl;
2489 				}
2490 			}
2491 		}
2492 	}
2493 
2494 	return (mlist);
2495 }
2496 
2497 static void
2498 mc_get_mlist(mc_opl_t *mcp)
2499 {
2500 	struct memlist *mlist;
2501 
2502 	memlist_read_lock();
2503 	mlist = mc_memlist_dup(phys_install);
2504 	memlist_read_unlock();
2505 
2506 	if (mlist) {
2507 		mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address);
2508 	}
2509 
2510 	if (mlist) {
2511 		uint64_t startpa, endpa;
2512 
2513 		startpa = mcp->mc_start_address + mcp->mc_size;
2514 		endpa = ptob(physmax + 1);
2515 		if (endpa > startpa) {
2516 			mlist = mc_memlist_del_span(mlist, startpa,
2517 			    endpa - startpa);
2518 		}
2519 	}
2520 
2521 	if (mlist) {
2522 		mcp->mlist = mlist;
2523 	}
2524 }
2525 
2526 int
2527 mc_board_add(mc_opl_t *mcp)
2528 {
2529 	struct mc_addr_spec *macaddr;
2530 	cs_status_t *cs_status;
2531 	int len, len1, i, bk, cc;
2532 	mc_rsaddr_info_t rsaddr;
2533 	uint32_t mirr;
2534 	int nbanks = 0;
2535 	uint64_t nbytes = 0;
2536 	int mirror_mode = 0;
2537 	int ret;
2538 
2539 	/*
2540 	 * Get configurations from "pseudo-mc" node which includes:
2541 	 * board# : LSB number
2542 	 * mac-addr : physical base address of MAC registers
2543 	 * csX-mac-pa-trans-table: translation table from DIMM address
2544 	 *			to physical address or vice versa.
2545 	 */
2546 	mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip,
2547 	    DDI_PROP_DONTPASS, "board#", -1);
2548 
2549 	if (mcp->mc_board_num == -1) {
2550 		return (DDI_FAILURE);
2551 	}
2552 
2553 	/*
2554 	 * Get start address in this CAB. It can be gotten from
2555 	 * "sb-mem-ranges" property.
2556 	 */
2557 
2558 	if (get_base_address(mcp) == DDI_FAILURE) {
2559 		return (DDI_FAILURE);
2560 	}
2561 	/* get mac-pa trans tables */
2562 	for (i = 0; i < MC_TT_CS; i++) {
2563 		len = MC_TT_ENTRIES;
2564 		cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip,
2565 		    DDI_PROP_DONTPASS, mc_tbl_name[i],
2566 		    (caddr_t)mcp->mc_trans_table[i], &len);
2567 
2568 		if (cc != DDI_SUCCESS) {
2569 			bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES);
2570 		}
2571 	}
2572 	mcp->mlist = NULL;
2573 
2574 	mc_get_mlist(mcp);
2575 
2576 	/* initialize bank informations */
2577 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2578 	    "mc-addr", (caddr_t)&macaddr, &len);
2579 	if (cc != DDI_SUCCESS) {
2580 		cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc);
2581 		return (DDI_FAILURE);
2582 	}
2583 
2584 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2585 	    "cs-status", (caddr_t)&cs_status, &len1);
2586 
2587 	if (cc != DDI_SUCCESS) {
2588 		if (len > 0)
2589 			kmem_free(macaddr, len);
2590 		cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc);
2591 		return (DDI_FAILURE);
2592 	}
2593 	/* get the physical board number for a given logical board number */
2594 	mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num);
2595 
2596 	if (mcp->mc_phys_board_num < 0) {
2597 		if (len > 0)
2598 			kmem_free(macaddr, len);
2599 		cmn_err(CE_WARN, "Unable to obtain the physical board number");
2600 		return (DDI_FAILURE);
2601 	}
2602 
2603 	mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL);
2604 
2605 	for (i = 0; i < len1 / sizeof (cs_status_t); i++) {
2606 		nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) |
2607 		    ((uint64_t)cs_status[i].cs_avail_low);
2608 	}
2609 	if (len1 > 0)
2610 		kmem_free(cs_status, len1);
2611 	nbanks = len / sizeof (struct mc_addr_spec);
2612 
2613 	if (nbanks > 0)
2614 		nbytes /= nbanks;
2615 	else {
2616 		/* No need to free macaddr because len must be 0 */
2617 		mcp->mc_status |= MC_MEMORYLESS;
2618 		return (DDI_SUCCESS);
2619 	}
2620 
2621 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2622 		mcp->mc_scf_retry[i] = 0;
2623 		mcp->mc_period[i] = 0;
2624 		mcp->mc_speedup_period[i] = 0;
2625 	}
2626 
2627 	/*
2628 	 * Get the memory size here. Let it be B (bytes).
2629 	 * Let T be the time in u.s. to scan 64 bytes.
2630 	 * If we want to complete 1 round of scanning in P seconds.
2631 	 *
2632 	 *	B * T * 10^(-6)	= P
2633 	 *	---------------
2634 	 *		64
2635 	 *
2636 	 *	T = P * 64 * 10^6
2637 	 *	    -------------
2638 	 *		B
2639 	 *
2640 	 *	  = P * 64 * 10^6
2641 	 *	    -------------
2642 	 *		B
2643 	 *
2644 	 *	The timing bits are set in PTRL_CNTL[28:26] where
2645 	 *
2646 	 *	0	- 1 m.s
2647 	 *	1	- 512 u.s.
2648 	 *	10	- 256 u.s.
2649 	 *	11	- 128 u.s.
2650 	 *	100	- 64 u.s.
2651 	 *	101	- 32 u.s.
2652 	 *	110	- 0 u.s.
2653 	 *	111	- reserved.
2654 	 *
2655 	 *
2656 	 *	a[0] = 110, a[1] = 101, ... a[6] = 0
2657 	 *
2658 	 *	cs-status property is int x 7
2659 	 *	0 - cs#
2660 	 *	1 - cs-status
2661 	 *	2 - cs-avail.hi
2662 	 *	3 - cs-avail.lo
2663 	 *	4 - dimm-capa.hi
2664 	 *	5 - dimm-capa.lo
2665 	 *	6 - #of dimms
2666 	 */
2667 
2668 	if (nbytes > 0) {
2669 		int i;
2670 		uint64_t ms;
2671 		ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes;
2672 		mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds;
2673 		for (i = 0; i < MC_MAX_SPEEDS - 1; i++) {
2674 			if (ms < mc_scan_speeds[i + 1].mc_period) {
2675 				mcp->mc_speed = mc_scan_speeds[i].mc_speeds;
2676 				break;
2677 			}
2678 		}
2679 	} else
2680 		mcp->mc_speed = 0;
2681 
2682 
2683 	for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) {
2684 		struct mc_bank *bankp;
2685 		mc_retry_info_t *retry;
2686 		uint32_t reg;
2687 		int k;
2688 
2689 		/*
2690 		 * setup bank
2691 		 */
2692 		bk = macaddr[i].bank;
2693 		bankp = &(mcp->mc_bank[bk]);
2694 		bankp->mcb_status = BANK_INSTALLED;
2695 		bankp->mcb_reg_base = REGS_PA(macaddr, i);
2696 
2697 		bankp->mcb_retry_freelist = NULL;
2698 		bankp->mcb_retry_pending = NULL;
2699 		bankp->mcb_active = NULL;
2700 		retry = &bankp->mcb_retry_infos[0];
2701 		for (k = 0; k < MC_RETRY_COUNT; k++, retry++) {
2702 			mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
2703 		}
2704 
2705 		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk));
2706 		bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS);
2707 
2708 		/*
2709 		 * check if mirror mode
2710 		 */
2711 		mirr = LD_MAC_REG(MAC_MIRR(mcp, bk));
2712 
2713 		if (mirr & MAC_MIRR_MIRROR_MODE) {
2714 			MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num,
2715 			    bk);
2716 			bankp->mcb_status |= BANK_MIRROR_MODE;
2717 			mirror_mode = 1;
2718 			/*
2719 			 * The following bit is only used for
2720 			 * error injection.  We should clear it
2721 			 */
2722 			if (mirr & MAC_MIRR_BANK_EXCLUSIVE)
2723 				ST_MAC_REG(MAC_MIRR(mcp, bk), 0);
2724 		}
2725 
2726 		/*
2727 		 * restart if not mirror mode or the other bank
2728 		 * of the mirror is not running
2729 		 */
2730 		if (!(mirr & MAC_MIRR_MIRROR_MODE) ||
2731 		    !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) {
2732 			MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num,
2733 			    bk);
2734 			get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr);
2735 			rsaddr.mi_valid = 0;
2736 			rsaddr.mi_injectrestart = 0;
2737 			restart_patrol(mcp, bk, &rsaddr);
2738 		} else {
2739 			MC_LOG("Not starting up /LSB%d/B%d\n",
2740 			    mcp->mc_board_num, bk);
2741 		}
2742 		bankp->mcb_status |= BANK_PTRL_RUNNING;
2743 	}
2744 	if (len > 0)
2745 		kmem_free(macaddr, len);
2746 
2747 	ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode",
2748 	    mirror_mode);
2749 	if (ret != DDI_PROP_SUCCESS) {
2750 		cmn_err(CE_WARN, "Unable to update mirror-mode property");
2751 	}
2752 
2753 	mcp->mc_dimm_list = mc_get_dimm_list(mcp);
2754 
2755 	/*
2756 	 * set interval in HZ.
2757 	 */
2758 	mcp->mc_last_error = 0;
2759 
2760 	/* restart memory patrol checking */
2761 	mcp->mc_status |= MC_POLL_RUNNING;
2762 
2763 	return (DDI_SUCCESS);
2764 }
2765 
2766 int
2767 mc_board_del(mc_opl_t *mcp)
2768 {
2769 	int i;
2770 	scf_log_t *p;
2771 
2772 	/*
2773 	 * cleanup mac state
2774 	 */
2775 	mutex_enter(&mcp->mc_lock);
2776 	if (mcp->mc_status & MC_MEMORYLESS) {
2777 		mutex_exit(&mcp->mc_lock);
2778 		mutex_destroy(&mcp->mc_lock);
2779 		return (DDI_SUCCESS);
2780 	}
2781 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2782 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2783 			mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED;
2784 		}
2785 	}
2786 
2787 	/* stop memory patrol checking */
2788 	mcp->mc_status &= ~MC_POLL_RUNNING;
2789 
2790 	/* just throw away all the scf logs */
2791 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2792 		while ((p = mcp->mc_scf_log[i]) != NULL) {
2793 			mcp->mc_scf_log[i] = p->sl_next;
2794 			mcp->mc_scf_total[i]--;
2795 			kmem_free(p, sizeof (scf_log_t));
2796 		}
2797 	}
2798 
2799 	if (mcp->mlist)
2800 		mc_memlist_delete(mcp->mlist);
2801 
2802 	if (mcp->mc_dimm_list)
2803 		mc_free_dimm_list(mcp->mc_dimm_list);
2804 
2805 	mutex_exit(&mcp->mc_lock);
2806 
2807 	mutex_destroy(&mcp->mc_lock);
2808 	return (DDI_SUCCESS);
2809 }
2810 
2811 int
2812 mc_suspend(mc_opl_t *mcp, uint32_t flag)
2813 {
2814 	/* stop memory patrol checking */
2815 	mutex_enter(&mcp->mc_lock);
2816 	if (mcp->mc_status & MC_MEMORYLESS) {
2817 		mutex_exit(&mcp->mc_lock);
2818 		return (DDI_SUCCESS);
2819 	}
2820 
2821 	mcp->mc_status &= ~MC_POLL_RUNNING;
2822 
2823 	mcp->mc_status |= flag;
2824 	mutex_exit(&mcp->mc_lock);
2825 
2826 	return (DDI_SUCCESS);
2827 }
2828 
2829 void
2830 opl_mc_update_mlist(void)
2831 {
2832 	int i;
2833 	mc_opl_t *mcp;
2834 
2835 	/*
2836 	 * memory information is not updated until
2837 	 * the post attach/detach stage during DR.
2838 	 * This interface is used by dr_mem to inform
2839 	 * mc-opl to update the mlist.
2840 	 */
2841 
2842 	mutex_enter(&mcmutex);
2843 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2844 		if ((mcp = mc_instances[i]) == NULL)
2845 			continue;
2846 		mutex_enter(&mcp->mc_lock);
2847 		if (mcp->mlist)
2848 			mc_memlist_delete(mcp->mlist);
2849 		mcp->mlist = NULL;
2850 		mc_get_mlist(mcp);
2851 		mutex_exit(&mcp->mc_lock);
2852 	}
2853 	mutex_exit(&mcmutex);
2854 }
2855 
2856 /* caller must clear the SUSPEND bits or this will do nothing */
2857 
2858 int
2859 mc_resume(mc_opl_t *mcp, uint32_t flag)
2860 {
2861 	int i;
2862 	uint64_t basepa;
2863 
2864 	mutex_enter(&mcp->mc_lock);
2865 	if (mcp->mc_status & MC_MEMORYLESS) {
2866 		mutex_exit(&mcp->mc_lock);
2867 		return (DDI_SUCCESS);
2868 	}
2869 	basepa = mcp->mc_start_address;
2870 	if (get_base_address(mcp) == DDI_FAILURE) {
2871 		mutex_exit(&mcp->mc_lock);
2872 		return (DDI_FAILURE);
2873 	}
2874 
2875 	if (basepa != mcp->mc_start_address) {
2876 		if (mcp->mlist)
2877 			mc_memlist_delete(mcp->mlist);
2878 		mcp->mlist = NULL;
2879 		mc_get_mlist(mcp);
2880 	}
2881 
2882 	mcp->mc_status &= ~flag;
2883 
2884 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2885 		mutex_exit(&mcp->mc_lock);
2886 		return (DDI_SUCCESS);
2887 	}
2888 
2889 	if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2890 		/* restart memory patrol checking */
2891 		mcp->mc_status |= MC_POLL_RUNNING;
2892 		for (i = 0; i < BANKNUM_PER_SB; i++) {
2893 			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2894 				mc_check_errors_func(mcp);
2895 			}
2896 		}
2897 	}
2898 	mutex_exit(&mcp->mc_lock);
2899 
2900 	return (DDI_SUCCESS);
2901 }
2902 
2903 static mc_opl_t *
2904 mc_pa_to_mcp(uint64_t pa)
2905 {
2906 	mc_opl_t *mcp;
2907 	int i;
2908 
2909 	ASSERT(MUTEX_HELD(&mcmutex));
2910 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2911 		if ((mcp = mc_instances[i]) == NULL)
2912 			continue;
2913 		/* if mac patrol is suspended, we cannot rely on it */
2914 		if (!(mcp->mc_status & MC_POLL_RUNNING) ||
2915 		    (mcp->mc_status & MC_SOFT_SUSPENDED))
2916 			continue;
2917 		if (mc_rangecheck_pa(mcp, pa)) {
2918 			return (mcp);
2919 		}
2920 	}
2921 	return (NULL);
2922 }
2923 
2924 /*
2925  * Get Physical Board number from Logical one.
2926  */
2927 static int
2928 mc_opl_get_physical_board(int sb)
2929 {
2930 	if (&opl_get_physical_board) {
2931 		return (opl_get_physical_board(sb));
2932 	}
2933 
2934 	cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n");
2935 	return (-1);
2936 }
2937 
2938 /* ARGSUSED */
2939 int
2940 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen,
2941 	int *lenp)
2942 {
2943 	int i;
2944 	int j;
2945 	int sb;
2946 	int bank;
2947 	int cs;
2948 	int rv = 0;
2949 	mc_opl_t *mcp;
2950 	char memb_num;
2951 
2952 	mutex_enter(&mcmutex);
2953 
2954 	if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) ||
2955 	    (!pa_is_valid(mcp, flt_addr))) {
2956 		mutex_exit(&mcmutex);
2957 		if (snprintf(buf, buflen, "UNKNOWN") >= buflen) {
2958 			return (ENOSPC);
2959 		} else {
2960 			if (lenp)
2961 				*lenp = strlen(buf);
2962 		}
2963 		return (0);
2964 	}
2965 
2966 	bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address);
2967 	sb = mcp->mc_phys_board_num;
2968 	cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address);
2969 
2970 	if (sb == -1) {
2971 		mutex_exit(&mcmutex);
2972 		return (ENXIO);
2973 	}
2974 
2975 	switch (plat_model) {
2976 	case MODEL_DC:
2977 		i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
2978 		j = (cs == 0) ? i : i + 2;
2979 		snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
2980 		    model_names[plat_model].unit_name, sb,
2981 		    mc_dc_dimm_unum_table[j],
2982 		    mc_dc_dimm_unum_table[j + 1]);
2983 		break;
2984 	case MODEL_FF2:
2985 	case MODEL_FF1:
2986 		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
2987 		j = (cs == 0) ? i : i + 2;
2988 		memb_num = mc_ff_dimm_unum_table[i][0];
2989 		snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
2990 		    model_names[plat_model].unit_name,
2991 		    model_names[plat_model].mem_name, memb_num,
2992 		    &mc_ff_dimm_unum_table[j][1],
2993 		    &mc_ff_dimm_unum_table[j + 1][1]);
2994 		break;
2995 	case MODEL_IKKAKU:
2996 		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
2997 		j = (cs == 0) ? i : i + 2;
2998 		snprintf(buf, buflen, "/%s/MEM%s MEM%s",
2999 		    model_names[plat_model].unit_name,
3000 		    &mc_ff_dimm_unum_table[j][1],
3001 		    &mc_ff_dimm_unum_table[j + 1][1]);
3002 		break;
3003 	default:
3004 		rv = ENXIO;
3005 	}
3006 	if (lenp) {
3007 		*lenp = strlen(buf);
3008 	}
3009 	mutex_exit(&mcmutex);
3010 	return (rv);
3011 }
3012 
3013 int
3014 opl_mc_suspend(void)
3015 {
3016 	mc_opl_t *mcp;
3017 	int i;
3018 
3019 	mutex_enter(&mcmutex);
3020 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3021 		if ((mcp = mc_instances[i]) == NULL)
3022 			continue;
3023 		mc_suspend(mcp, MC_SOFT_SUSPENDED);
3024 	}
3025 	mutex_exit(&mcmutex);
3026 
3027 	return (0);
3028 }
3029 
3030 int
3031 opl_mc_resume(void)
3032 {
3033 	mc_opl_t *mcp;
3034 	int i;
3035 
3036 	mutex_enter(&mcmutex);
3037 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3038 		if ((mcp = mc_instances[i]) == NULL)
3039 			continue;
3040 		mc_resume(mcp, MC_SOFT_SUSPENDED);
3041 	}
3042 	mutex_exit(&mcmutex);
3043 
3044 	return (0);
3045 }
3046 static void
3047 insert_mcp(mc_opl_t *mcp)
3048 {
3049 	mutex_enter(&mcmutex);
3050 	if (mc_instances[mcp->mc_board_num] != NULL) {
3051 		MC_LOG("mc-opl instance for board# %d already exists\n",
3052 		    mcp->mc_board_num);
3053 	}
3054 	mc_instances[mcp->mc_board_num] = mcp;
3055 	mutex_exit(&mcmutex);
3056 }
3057 
3058 static void
3059 delete_mcp(mc_opl_t *mcp)
3060 {
3061 	mutex_enter(&mcmutex);
3062 	mc_instances[mcp->mc_board_num] = 0;
3063 	mutex_exit(&mcmutex);
3064 }
3065 
3066 /* Error injection interface */
3067 
3068 static void
3069 mc_lock_va(uint64_t pa, caddr_t new_va)
3070 {
3071 	tte_t tte;
3072 
3073 	vtag_flushpage(new_va, (uint64_t)ksfmmup);
3074 	sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K);
3075 	tte.tte_intlo |= TTE_LCK_INT;
3076 	sfmmu_dtlb_ld_kva(new_va, &tte);
3077 }
3078 
3079 static void
3080 mc_unlock_va(caddr_t va)
3081 {
3082 	vtag_flushpage(va, (uint64_t)ksfmmup);
3083 }
3084 
3085 /* ARGSUSED */
3086 int
3087 mc_inject_error(int error_type, uint64_t pa, uint32_t flags)
3088 {
3089 	mc_opl_t *mcp;
3090 	int bank;
3091 	uint32_t dimm_addr;
3092 	uint32_t cntl;
3093 	mc_rsaddr_info_t rsaddr;
3094 	uint32_t data, stat;
3095 	int both_sides = 0;
3096 	uint64_t pa0;
3097 	int extra_injection_needed = 0;
3098 	extern void cpu_flush_ecache(void);
3099 
3100 	MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags);
3101 
3102 	mutex_enter(&mcmutex);
3103 	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3104 		mutex_exit(&mcmutex);
3105 		MC_LOG("mc_inject_error: invalid pa\n");
3106 		return (ENOTSUP);
3107 	}
3108 
3109 	mutex_enter(&mcp->mc_lock);
3110 	mutex_exit(&mcmutex);
3111 
3112 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
3113 		mutex_exit(&mcp->mc_lock);
3114 		MC_LOG("mc-opl has been suspended.  No error injection.\n");
3115 		return (EBUSY);
3116 	}
3117 
3118 	/* convert pa to offset within the board */
3119 	MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address);
3120 
3121 	if (!pa_is_valid(mcp, pa)) {
3122 		mutex_exit(&mcp->mc_lock);
3123 		return (EINVAL);
3124 	}
3125 
3126 	pa0 = pa - mcp->mc_start_address;
3127 
3128 	bank = pa_to_bank(mcp, pa0);
3129 
3130 	if (flags & MC_INJECT_FLAG_OTHER)
3131 		bank = bank ^ 1;
3132 
3133 	if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) {
3134 		mutex_exit(&mcp->mc_lock);
3135 		MC_LOG("Not mirror mode\n");
3136 		return (EINVAL);
3137 	}
3138 
3139 	dimm_addr = pa_to_dimm(mcp, pa0);
3140 
3141 	MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank,
3142 	    dimm_addr);
3143 
3144 
3145 	switch (error_type) {
3146 	case MC_INJECT_INTERMITTENT_MCE:
3147 	case MC_INJECT_PERMANENT_MCE:
3148 	case MC_INJECT_MUE:
3149 		both_sides = 1;
3150 	}
3151 
3152 	if (flags & MC_INJECT_FLAG_RESET)
3153 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0);
3154 
3155 	ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK);
3156 
3157 	if (both_sides) {
3158 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0);
3159 		ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr &
3160 		    MAC_EG_ADD_MASK);
3161 	}
3162 
3163 	switch (error_type) {
3164 	case MC_INJECT_SUE:
3165 		extra_injection_needed = 1;
3166 		/*FALLTHROUGH*/
3167 	case MC_INJECT_UE:
3168 	case MC_INJECT_MUE:
3169 		if (flags & MC_INJECT_FLAG_PATH) {
3170 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3171 			    MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE;
3172 		} else {
3173 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 |
3174 			    MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE;
3175 		}
3176 		flags |= MC_INJECT_FLAG_ST;
3177 		break;
3178 	case MC_INJECT_INTERMITTENT_CE:
3179 	case MC_INJECT_INTERMITTENT_MCE:
3180 		if (flags & MC_INJECT_FLAG_PATH) {
3181 			cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 |
3182 			    MAC_EG_RDERR_ONCE;
3183 		} else {
3184 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3185 			    MAC_EG_DERR_ONCE;
3186 		}
3187 		extra_injection_needed = 1;
3188 		flags |= MC_INJECT_FLAG_ST;
3189 		break;
3190 	case MC_INJECT_PERMANENT_CE:
3191 	case MC_INJECT_PERMANENT_MCE:
3192 		if (flags & MC_INJECT_FLAG_PATH) {
3193 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3194 			    MAC_EG_RDERR_ALWAYS;
3195 		} else {
3196 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3197 			    MAC_EG_DERR_ALWAYS;
3198 		}
3199 		flags |= MC_INJECT_FLAG_ST;
3200 		break;
3201 	case MC_INJECT_CMPE:
3202 		data = 0xabcdefab;
3203 		stphys(pa, data);
3204 		cpu_flush_ecache();
3205 		MC_LOG("CMPE: writing data %x to %lx\n", data, pa);
3206 		ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE);
3207 		stphys(pa, data ^ 0xffffffff);
3208 		membar_sync();
3209 		cpu_flush_ecache();
3210 		ST_MAC_REG(MAC_MIRR(mcp, bank), 0);
3211 		MC_LOG("CMPE: write new data %xto %lx\n", data, pa);
3212 		cntl = 0;
3213 		break;
3214 	case MC_INJECT_NOP:
3215 		cntl = 0;
3216 		break;
3217 	default:
3218 		MC_LOG("mc_inject_error: invalid option\n");
3219 		cntl = 0;
3220 	}
3221 
3222 	if (cntl) {
3223 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK);
3224 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3225 
3226 		if (both_sides) {
3227 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3228 			    MAC_EG_SETUP_MASK);
3229 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3230 		}
3231 	}
3232 
3233 	/*
3234 	 * For all injection cases except compare error, we
3235 	 * must write to the PA to trigger the error.
3236 	 */
3237 
3238 	if (flags & MC_INJECT_FLAG_ST) {
3239 		data = 0xf0e0d0c0;
3240 		MC_LOG("Writing %x to %lx\n", data, pa);
3241 		stphys(pa, data);
3242 		cpu_flush_ecache();
3243 	}
3244 
3245 
3246 	if (flags & MC_INJECT_FLAG_LD) {
3247 		if (flags & MC_INJECT_FLAG_PREFETCH) {
3248 			/*
3249 			 * Use strong prefetch operation to
3250 			 * inject MI errors.
3251 			 */
3252 			page_t *pp;
3253 			extern void mc_prefetch(caddr_t);
3254 
3255 			MC_LOG("prefetch\n");
3256 
3257 			pp = page_numtopp_nolock(pa >> PAGESHIFT);
3258 			if (pp != NULL) {
3259 				caddr_t	va, va1;
3260 
3261 				va = ppmapin(pp, PROT_READ|PROT_WRITE,
3262 				    (caddr_t)-1);
3263 				kpreempt_disable();
3264 				mc_lock_va((uint64_t)pa, va);
3265 				va1 = va + (pa & (PAGESIZE - 1));
3266 				mc_prefetch(va1);
3267 				mc_unlock_va(va);
3268 				kpreempt_enable();
3269 				ppmapout(va);
3270 
3271 				/*
3272 				 * For MI errors, we need one extra
3273 				 * injection for HW patrol to stop.
3274 				 */
3275 				extra_injection_needed = 1;
3276 			} else {
3277 				cmn_err(CE_WARN, "Cannot find page structure"
3278 				    " for PA %lx\n", pa);
3279 			}
3280 		} else {
3281 			MC_LOG("Reading from %lx\n", pa);
3282 			data = ldphys(pa);
3283 			MC_LOG("data = %x\n", data);
3284 		}
3285 
3286 		if (extra_injection_needed) {
3287 			/*
3288 			 * These are the injection cases where the
3289 			 * requested injected errors will not cause the HW
3290 			 * patrol to stop. For these cases, we need to inject
3291 			 * an extra 'real' PTRL error to force the
3292 			 * HW patrol to stop so that we can report the
3293 			 * errors injected. Note that we cannot read
3294 			 * and report error status while the HW patrol
3295 			 * is running.
3296 			 */
3297 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank),
3298 			    cntl & MAC_EG_SETUP_MASK);
3299 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3300 
3301 			if (both_sides) {
3302 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3303 				    MAC_EG_SETUP_MASK);
3304 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3305 			}
3306 			data = 0xf0e0d0c0;
3307 			MC_LOG("Writing %x to %lx\n", data, pa);
3308 			stphys(pa, data);
3309 			cpu_flush_ecache();
3310 		}
3311 	}
3312 
3313 	if (flags & MC_INJECT_FLAG_RESTART) {
3314 		MC_LOG("Restart patrol\n");
3315 		rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num;
3316 		rsaddr.mi_restartaddr.ma_bank = bank;
3317 		rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr;
3318 		rsaddr.mi_valid = 1;
3319 		rsaddr.mi_injectrestart = 1;
3320 		restart_patrol(mcp, bank, &rsaddr);
3321 	}
3322 
3323 	if (flags & MC_INJECT_FLAG_POLL) {
3324 		int running;
3325 		int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
3326 
3327 		MC_LOG("Poll patrol error\n");
3328 		stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank));
3329 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
3330 		running = cntl & MAC_CNTL_PTRL_START;
3331 
3332 		if (!running &&
3333 		    (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) {
3334 			/*
3335 			 * HW patrol stopped and we have errors to
3336 			 * report. Do it.
3337 			 */
3338 			mcp->mc_speedup_period[ebank] = 0;
3339 			rsaddr.mi_valid = 0;
3340 			rsaddr.mi_injectrestart = 0;
3341 			if (IS_MIRROR(mcp, bank)) {
3342 				mc_error_handler_mir(mcp, bank, &rsaddr);
3343 			} else {
3344 				mc_error_handler(mcp, bank, &rsaddr);
3345 			}
3346 
3347 			restart_patrol(mcp, bank, &rsaddr);
3348 		} else {
3349 			/*
3350 			 * We are expecting to report injected
3351 			 * errors but the HW patrol is still running.
3352 			 * Speed up the scanning
3353 			 */
3354 			mcp->mc_speedup_period[ebank] = 2;
3355 			MAC_CMD(mcp, bank, 0);
3356 			restart_patrol(mcp, bank, NULL);
3357 		}
3358 	}
3359 
3360 	mutex_exit(&mcp->mc_lock);
3361 	return (0);
3362 }
3363 
3364 void
3365 mc_stphysio(uint64_t pa, uint32_t data)
3366 {
3367 	MC_LOG("0x%x -> pa(%lx)\n", data, pa);
3368 	stphysio(pa, data);
3369 
3370 	/* force the above write to be processed by mac patrol */
3371 	data = ldphysio(pa);
3372 	MC_LOG("pa(%lx) = 0x%x\n", pa, data);
3373 }
3374 
3375 uint32_t
3376 mc_ldphysio(uint64_t pa)
3377 {
3378 	uint32_t rv;
3379 
3380 	rv = ldphysio(pa);
3381 	MC_LOG("pa(%lx) = 0x%x\n", pa, rv);
3382 	return (rv);
3383 }
3384 
3385 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3386 
3387 /*
3388  * parse_unum_memory -- extract the board number and the DIMM name from
3389  * the unum.
3390  *
3391  * Return 0 for success and non-zero for a failure.
3392  */
3393 int
3394 parse_unum_memory(char *unum, int *board, char *dname)
3395 {
3396 	char *c;
3397 	char x, y, z;
3398 
3399 	if ((c = strstr(unum, "CMU")) != NULL) {
3400 		/* DC Model */
3401 		c += 3;
3402 		*board = (uint8_t)stoi(&c);
3403 		if ((c = strstr(c, "MEM")) == NULL) {
3404 			return (1);
3405 		}
3406 		c += 3;
3407 		if (strlen(c) < 3) {
3408 			return (2);
3409 		}
3410 		if ((!isdigit(c[0])) || (!(isdigit(c[1]))) ||
3411 		    ((c[2] != 'A') && (c[2] != 'B'))) {
3412 			return (3);
3413 		}
3414 		x = c[0];
3415 		y = c[1];
3416 		z = c[2];
3417 	} else if ((c = strstr(unum, "MBU_")) != NULL) {
3418 		/*  FF1/FF2/Ikkaku Model */
3419 		c += 4;
3420 		if ((c[0] != 'A') && (c[0] != 'B')) {
3421 			return (4);
3422 		}
3423 		if (plat_model == MODEL_IKKAKU) {
3424 			/* Ikkaku Model */
3425 			x = '0';
3426 			*board = 0;
3427 		} else {
3428 			/* FF1/FF2 Model */
3429 			if ((c = strstr(c, "MEMB")) == NULL) {
3430 				return (5);
3431 			}
3432 			c += 4;
3433 
3434 			x = c[0];
3435 			*board =  ((uint8_t)stoi(&c)) / 4;
3436 		}
3437 
3438 		if ((c = strstr(c, "MEM")) == NULL) {
3439 			return (6);
3440 		}
3441 		c += 3;
3442 		if (strlen(c) < 2) {
3443 			return (7);
3444 		}
3445 		if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) {
3446 			return (8);
3447 		}
3448 		y = c[0];
3449 		z = c[1];
3450 	} else {
3451 		return (9);
3452 	}
3453 	if (*board < 0) {
3454 		return (10);
3455 	}
3456 	dname[0] = x;
3457 	dname[1] = y;
3458 	dname[2] = z;
3459 	dname[3] = '\0';
3460 	return (0);
3461 }
3462 
3463 /*
3464  * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and
3465  * the DIMM name.
3466  */
3467 int
3468 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
3469     int buflen, int *lenp)
3470 {
3471 	int		ret = ENODEV;
3472 	mc_dimm_info_t	*d = NULL;
3473 
3474 	if ((d = mcp->mc_dimm_list) == NULL)
3475 		return (ENOTSUP);
3476 
3477 	for (; d != NULL; d = d->md_next) {
3478 		if (strcmp(d->md_dimmname, dname) == 0) {
3479 			break;
3480 		}
3481 	}
3482 	if (d != NULL) {
3483 		*lenp = strlen(d->md_serial) + strlen(d->md_partnum);
3484 		if (buflen <=  *lenp) {
3485 			cmn_err(CE_WARN, "mc_get_mem_sid_dimm: "
3486 			    "buflen is smaller than %d\n", *lenp);
3487 			ret = ENOSPC;
3488 		} else {
3489 			snprintf(buf, buflen, "%s:%s",
3490 			    d->md_serial, d->md_partnum);
3491 			ret = 0;
3492 		}
3493 	}
3494 	MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n",
3495 	    ret, dname, (ret == 0) ? buf : "");
3496 	return (ret);
3497 }
3498 
3499 int
3500 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb,
3501     int bank, uint32_t mf_type, uint32_t d_slot)
3502 {
3503 	int	lenp = buflen;
3504 	int	id;
3505 	int	ret;
3506 	char	*dimmnm;
3507 
3508 	if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
3509 	    mf_type == FLT_TYPE_PERMANENT_CE) {
3510 		if (plat_model == MODEL_DC) {
3511 			/*
3512 			 * All DC models
3513 			 */
3514 			id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
3515 			dimmnm = mc_dc_dimm_unum_table[id];
3516 		} else {
3517 			/*
3518 			 * All FF and Ikkaku models
3519 			 */
3520 			id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
3521 			dimmnm = mc_ff_dimm_unum_table[id];
3522 		}
3523 		if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen,
3524 		    &lenp)) != 0) {
3525 			return (ret);
3526 		}
3527 	} else {
3528 		return (1);
3529 	}
3530 
3531 	return (0);
3532 }
3533 
3534 /*
3535  * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum.
3536  */
3537 int
3538 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3539 {
3540 	int	i;
3541 	int	ret = ENODEV;
3542 	int	board;
3543 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3544 	mc_opl_t *mcp;
3545 
3546 	MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen);
3547 	if ((ret = parse_unum_memory(unum, &board, dname)) != 0) {
3548 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3549 		    unum, ret);
3550 		return (EINVAL);
3551 	}
3552 
3553 	if (board < 0) {
3554 		MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n",
3555 		    board, dname);
3556 		return (EINVAL);
3557 	}
3558 
3559 	mutex_enter(&mcmutex);
3560 	/*
3561 	 * return ENOENT if we can not find the matching board.
3562 	 */
3563 	ret = ENOENT;
3564 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3565 		if ((mcp = mc_instances[i]) == NULL)
3566 			continue;
3567 		mutex_enter(&mcp->mc_lock);
3568 		if (mcp->mc_phys_board_num != board) {
3569 			mutex_exit(&mcp->mc_lock);
3570 			continue;
3571 		}
3572 		ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp);
3573 		if (ret == 0) {
3574 			mutex_exit(&mcp->mc_lock);
3575 			break;
3576 		}
3577 		mutex_exit(&mcp->mc_lock);
3578 	}
3579 	mutex_exit(&mcmutex);
3580 	return (ret);
3581 }
3582 
3583 /*
3584  * mc_get_mem_offset -- get the offset in a DIMM for a given physical address.
3585  */
3586 int
3587 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
3588 {
3589 	int		i;
3590 	int		ret = ENODEV;
3591 	mc_addr_t	maddr;
3592 	mc_opl_t	*mcp;
3593 
3594 	mutex_enter(&mcmutex);
3595 	for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) {
3596 		if ((mcp = mc_instances[i]) == NULL)
3597 			continue;
3598 		mutex_enter(&mcp->mc_lock);
3599 		if (!pa_is_valid(mcp, paddr)) {
3600 			mutex_exit(&mcp->mc_lock);
3601 			continue;
3602 		}
3603 		if (pa_to_maddr(mcp, paddr, &maddr) == 0) {
3604 			*offp = maddr.ma_dimm_addr;
3605 			ret = 0;
3606 		}
3607 		mutex_exit(&mcp->mc_lock);
3608 	}
3609 	mutex_exit(&mcmutex);
3610 	MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n",
3611 	    ret, paddr, *offp);
3612 	return (ret);
3613 }
3614 
3615 /*
3616  * dname_to_bankslot - Get the bank and slot number from the DIMM name.
3617  */
3618 int
3619 dname_to_bankslot(char *dname, int *bank, int *slot)
3620 {
3621 	int i;
3622 	int tsz;
3623 	char **tbl;
3624 
3625 	if (plat_model == MODEL_DC) {
3626 		/*
3627 		 * All DC models
3628 		 */
3629 		tbl = mc_dc_dimm_unum_table;
3630 		tsz = OPL_MAX_DIMMS;
3631 	} else {
3632 		/*
3633 		 * All FF and Ikkaku models
3634 		 */
3635 		tbl = mc_ff_dimm_unum_table;
3636 		tsz = 2 * OPL_MAX_DIMMS;
3637 	}
3638 
3639 	for (i = 0; i < tsz; i++) {
3640 		if (strcmp(dname,  tbl[i]) == 0) {
3641 			break;
3642 		}
3643 	}
3644 	if (i == tsz) {
3645 		return (1);
3646 	}
3647 	*bank = INDEX_TO_BANK(i);
3648 	*slot = INDEX_TO_SLOT(i);
3649 	return (0);
3650 }
3651 
3652 /*
3653  * mc_get_mem_addr -- get the physical address of a DIMM corresponding
3654  * to the unum and sid.
3655  */
3656 int
3657 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr)
3658 {
3659 	int	board;
3660 	int	bank;
3661 	int	slot;
3662 	int	i;
3663 	int	ret = ENODEV;
3664 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3665 	mc_addr_t maddr;
3666 	mc_opl_t *mcp;
3667 
3668 	MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n",
3669 	    unum, sid, offset);
3670 	if (parse_unum_memory(unum, &board, dname) != 0) {
3671 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3672 		    unum, ret);
3673 		return (EINVAL);
3674 	}
3675 
3676 	if (board < 0) {
3677 		MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n",
3678 		    board, dname);
3679 		return (EINVAL);
3680 	}
3681 
3682 	mutex_enter(&mcmutex);
3683 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3684 		if ((mcp = mc_instances[i]) == NULL)
3685 			continue;
3686 		mutex_enter(&mcp->mc_lock);
3687 		if (mcp->mc_phys_board_num != board) {
3688 			mutex_exit(&mcp->mc_lock);
3689 			continue;
3690 		}
3691 
3692 		ret = dname_to_bankslot(dname, &bank, &slot);
3693 		MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot);
3694 		if (ret != 0) {
3695 			MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n");
3696 			ret = ENODEV;
3697 		} else {
3698 			maddr.ma_bd = mcp->mc_board_num;
3699 			maddr.ma_bank =  bank;
3700 			maddr.ma_dimm_addr = offset;
3701 			ret = mcaddr_to_pa(mcp, &maddr, paddr);
3702 			if (ret != 0) {
3703 				MC_LOG("mc_get_mem_addr: "
3704 				    "mcaddr_to_pa failed\n");
3705 				ret = ENODEV;
3706 			}
3707 			mutex_exit(&mcp->mc_lock);
3708 			break;
3709 		}
3710 		mutex_exit(&mcp->mc_lock);
3711 	}
3712 	mutex_exit(&mcmutex);
3713 	MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr);
3714 	return (ret);
3715 }
3716 
3717 static void
3718 mc_free_dimm_list(mc_dimm_info_t *d)
3719 {
3720 	mc_dimm_info_t *next;
3721 
3722 	while (d != NULL) {
3723 		next = d->md_next;
3724 		kmem_free(d, sizeof (mc_dimm_info_t));
3725 		d = next;
3726 	}
3727 }
3728 
3729 /*
3730  * mc_get_dimm_list -- get the list of dimms with serial-id info
3731  * from the SP.
3732  */
3733 mc_dimm_info_t *
3734 mc_get_dimm_list(mc_opl_t *mcp)
3735 {
3736 	uint32_t	bufsz;
3737 	uint32_t	maxbufsz;
3738 	int		ret;
3739 	int		sexp;
3740 	board_dimm_info_t *bd_dimmp;
3741 	mc_dimm_info_t	*dimm_list = NULL;
3742 
3743 	maxbufsz = bufsz = sizeof (board_dimm_info_t) +
3744 	    ((MCOPL_MAX_DIMMNAME +  MCOPL_MAX_SERIAL +
3745 	    MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS);
3746 
3747 	bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP);
3748 	ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz);
3749 
3750 	MC_LOG("mc_get_dimm_list:  scf_service_getinfo returned=%d\n", ret);
3751 	if (ret == 0) {
3752 		sexp = sizeof (board_dimm_info_t) +
3753 		    ((bd_dimmp->bd_dnamesz +  bd_dimmp->bd_serialsz +
3754 		    bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms);
3755 
3756 		if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) &&
3757 		    (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) &&
3758 		    (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) &&
3759 		    (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) &&
3760 		    (sexp <= bufsz)) {
3761 
3762 #ifdef DEBUG
3763 			if (oplmc_debug)
3764 				mc_dump_dimm_info(bd_dimmp);
3765 #endif
3766 			dimm_list = mc_prepare_dimmlist(bd_dimmp);
3767 
3768 		} else {
3769 			cmn_err(CE_WARN, "DIMM info version mismatch\n");
3770 		}
3771 	}
3772 	kmem_free(bd_dimmp, maxbufsz);
3773 	MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", dimm_list);
3774 	return (dimm_list);
3775 }
3776 
3777 /*
3778  * mc_prepare_dimmlist - Prepare the dimm list from the information
3779  * received from the SP.
3780  */
3781 mc_dimm_info_t *
3782 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp)
3783 {
3784 	char	*dimm_name;
3785 	char	*serial;
3786 	char	*part;
3787 	int	dimm;
3788 	int	dnamesz = bd_dimmp->bd_dnamesz;
3789 	int	sersz = bd_dimmp->bd_serialsz;
3790 	int	partsz = bd_dimmp->bd_partnumsz;
3791 	mc_dimm_info_t	*dimm_list = NULL;
3792 	mc_dimm_info_t	*d;
3793 
3794 	dimm_name = (char *)(bd_dimmp + 1);
3795 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3796 
3797 		d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t),
3798 		    KM_SLEEP);
3799 
3800 		bcopy(dimm_name, d->md_dimmname, dnamesz);
3801 		d->md_dimmname[dnamesz] = 0;
3802 
3803 		serial = dimm_name + dnamesz;
3804 		bcopy(serial, d->md_serial, sersz);
3805 		d->md_serial[sersz] = 0;
3806 
3807 		part = serial + sersz;
3808 		bcopy(part, d->md_partnum, partsz);
3809 		d->md_partnum[partsz] = 0;
3810 
3811 		d->md_next = dimm_list;
3812 		dimm_list = d;
3813 		dimm_name = part + partsz;
3814 	}
3815 	return (dimm_list);
3816 }
3817 
3818 #ifdef DEBUG
3819 void
3820 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz)
3821 {
3822 	char dname[MCOPL_MAX_DIMMNAME + 1];
3823 	char serial[MCOPL_MAX_SERIAL + 1];
3824 	char part[ MCOPL_MAX_PARTNUM + 1];
3825 	char *b;
3826 
3827 	b = buf;
3828 	bcopy(b, dname, dnamesz);
3829 	dname[dnamesz] = 0;
3830 
3831 	b += dnamesz;
3832 	bcopy(b, serial, serialsz);
3833 	serial[serialsz] = 0;
3834 
3835 	b += serialsz;
3836 	bcopy(b, part, partnumsz);
3837 	part[partnumsz] = 0;
3838 
3839 	printf("DIMM=%s  Serial=%s PartNum=%s\n", dname, serial, part);
3840 }
3841 
3842 void
3843 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp)
3844 {
3845 	int	dimm;
3846 	int	dnamesz = bd_dimmp->bd_dnamesz;
3847 	int	sersz = bd_dimmp->bd_serialsz;
3848 	int	partsz = bd_dimmp->bd_partnumsz;
3849 	char	*buf;
3850 
3851 	printf("Version=%d Board=%02d DIMMs=%d NameSize=%d "
3852 	    "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version,
3853 	    bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz,
3854 	    bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz);
3855 	printf("======================================================\n");
3856 
3857 	buf = (char *)(bd_dimmp + 1);
3858 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3859 		mc_dump_dimm(buf, dnamesz, sersz, partsz);
3860 		buf += dnamesz + sersz + partsz;
3861 	}
3862 	printf("======================================================\n");
3863 }
3864 
3865 
3866 /* ARGSUSED */
3867 static int
3868 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
3869 	int *rvalp)
3870 {
3871 	caddr_t	buf;
3872 	uint64_t pa;
3873 	int rv = 0;
3874 	int i;
3875 	uint32_t flags;
3876 	static uint32_t offset = 0;
3877 
3878 
3879 	flags = (cmd >> 4) & 0xfffffff;
3880 
3881 	cmd &= 0xf;
3882 
3883 	MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags);
3884 
3885 	if (arg != NULL) {
3886 		if (ddi_copyin((const void *)arg, (void *)&pa,
3887 		    sizeof (uint64_t), 0) < 0) {
3888 			rv = EFAULT;
3889 			return (rv);
3890 		}
3891 		buf = NULL;
3892 	} else {
3893 		buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP);
3894 
3895 		pa = va_to_pa(buf);
3896 		pa += offset;
3897 
3898 		offset += 64;
3899 		if (offset >= PAGESIZE)
3900 			offset = 0;
3901 	}
3902 
3903 	switch (cmd) {
3904 	case MCI_CE:
3905 		mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags);
3906 		break;
3907 	case MCI_PERM_CE:
3908 		mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags);
3909 		break;
3910 	case MCI_UE:
3911 		mc_inject_error(MC_INJECT_UE, pa, flags);
3912 		break;
3913 	case MCI_M_CE:
3914 		mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags);
3915 		break;
3916 	case MCI_M_PCE:
3917 		mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags);
3918 		break;
3919 	case MCI_M_UE:
3920 		mc_inject_error(MC_INJECT_MUE, pa, flags);
3921 		break;
3922 	case MCI_CMP:
3923 		mc_inject_error(MC_INJECT_CMPE, pa, flags);
3924 		break;
3925 	case MCI_NOP:
3926 		mc_inject_error(MC_INJECT_NOP, pa, flags); break;
3927 	case MCI_SHOW_ALL:
3928 		mc_debug_show_all = 1;
3929 		break;
3930 	case MCI_SHOW_NONE:
3931 		mc_debug_show_all = 0;
3932 		break;
3933 	case MCI_ALLOC:
3934 		/*
3935 		 * just allocate some kernel memory and never free it
3936 		 * 512 MB seems to be the maximum size supported.
3937 		 */
3938 		cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512);
3939 		for (i = 0; i < flags; i++) {
3940 			buf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP);
3941 			cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n",
3942 			    (u_longlong_t)buf, (u_longlong_t)va_to_pa(buf));
3943 		}
3944 		break;
3945 	case MCI_SUSPEND:
3946 		(void) opl_mc_suspend();
3947 		break;
3948 	case MCI_RESUME:
3949 		(void) opl_mc_resume();
3950 		break;
3951 	default:
3952 		rv = ENXIO;
3953 	}
3954 	return (rv);
3955 }
3956 
3957 #endif /* DEBUG */
3958