xref: /illumos-gate/usr/src/uts/sun4u/opl/io/mc-opl.c (revision d0698e0d179f97729cacdbc2f13446a6b0a3f22a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2008
26  */
27 
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/conf.h>
31 #include <sys/modctl.h>
32 #include <sys/stat.h>
33 #include <sys/async.h>
34 #include <sys/machcpuvar.h>
35 #include <sys/machsystm.h>
36 #include <sys/promif.h>
37 #include <sys/ksynch.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ddifm.h>
42 #include <sys/fm/protocol.h>
43 #include <sys/fm/util.h>
44 #include <sys/kmem.h>
45 #include <sys/fm/io/opl_mc_fm.h>
46 #include <sys/memlist.h>
47 #include <sys/param.h>
48 #include <sys/disp.h>
49 #include <vm/page.h>
50 #include <sys/mc-opl.h>
51 #include <sys/opl.h>
52 #include <sys/opl_dimm.h>
53 #include <sys/scfd/scfostoescf.h>
54 #include <sys/cpu_module.h>
55 #include <vm/seg_kmem.h>
56 #include <sys/vmem.h>
57 #include <vm/hat_sfmmu.h>
58 #include <sys/vmsystm.h>
59 #include <sys/membar.h>
60 #include <sys/mem.h>
61 
62 /*
63  * Function prototypes
64  */
65 static int mc_open(dev_t *, int, int, cred_t *);
66 static int mc_close(dev_t, int, int, cred_t *);
67 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
68 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
69 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
70 
71 static int mc_poll_init(void);
72 static void mc_poll_fini(void);
73 static int mc_board_add(mc_opl_t *mcp);
74 static int mc_board_del(mc_opl_t *mcp);
75 static int mc_suspend(mc_opl_t *mcp, uint32_t flag);
76 static int mc_resume(mc_opl_t *mcp, uint32_t flag);
77 int opl_mc_suspend(void);
78 int opl_mc_resume(void);
79 
80 static void insert_mcp(mc_opl_t *mcp);
81 static void delete_mcp(mc_opl_t *mcp);
82 
83 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr);
84 
85 static int mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa);
86 
87 int mc_get_mem_unum(int, uint64_t, char *, int, int *);
88 int mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr);
89 int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
90 int mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp);
91 int mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
92     int buflen, int *lenp);
93 mc_dimm_info_t *mc_get_dimm_list(mc_opl_t *mcp);
94 mc_dimm_info_t *mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp);
95 int mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int lsb, int bank,
96     uint32_t mf_type, uint32_t d_slot);
97 static void mc_free_dimm_list(mc_dimm_info_t *d);
98 static void mc_get_mlist(mc_opl_t *);
99 static void mc_polling(void);
100 static int mc_opl_get_physical_board(int);
101 
102 static void mc_clear_rewrite(mc_opl_t *mcp, int i);
103 static void mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state);
104 static int mc_scf_log_event(mc_flt_page_t *flt_pag);
105 
106 #ifdef	DEBUG
107 static int mc_ioctl_debug(dev_t, int, intptr_t, int, cred_t *, int *);
108 void mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz);
109 void mc_dump_dimm_info(board_dimm_info_t *bd_dimmp);
110 #endif
111 
112 #pragma weak opl_get_physical_board
113 extern int opl_get_physical_board(int);
114 extern int plat_max_boards(void);
115 
116 /*
117  * Configuration data structures
118  */
119 static struct cb_ops mc_cb_ops = {
120 	mc_open,			/* open */
121 	mc_close,			/* close */
122 	nulldev,			/* strategy */
123 	nulldev,			/* print */
124 	nodev,				/* dump */
125 	nulldev,			/* read */
126 	nulldev,			/* write */
127 	mc_ioctl,			/* ioctl */
128 	nodev,				/* devmap */
129 	nodev,				/* mmap */
130 	nodev,				/* segmap */
131 	nochpoll,			/* poll */
132 	ddi_prop_op,			/* cb_prop_op */
133 	0,				/* streamtab */
134 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
135 	CB_REV,				/* rev */
136 	nodev,				/* cb_aread */
137 	nodev				/* cb_awrite */
138 };
139 
140 static struct dev_ops mc_ops = {
141 	DEVO_REV,			/* rev */
142 	0,				/* refcnt  */
143 	ddi_getinfo_1to1,		/* getinfo */
144 	nulldev,			/* identify */
145 	nulldev,			/* probe */
146 	mc_attach,			/* attach */
147 	mc_detach,			/* detach */
148 	nulldev,			/* reset */
149 	&mc_cb_ops,			/* cb_ops */
150 	(struct bus_ops *)0,		/* bus_ops */
151 	nulldev,			/* power */
152 	ddi_quiesce_not_needed,			/* quiesce */
153 };
154 
155 /*
156  * Driver globals
157  */
158 
159 static enum {
160 	MODEL_FF1,
161 	MODEL_FF2,
162 	MODEL_DC,
163 	MODEL_IKKAKU
164 } plat_model = MODEL_DC;	/* The default behaviour is DC */
165 
166 static struct plat_model_names {
167 	const char *unit_name;
168 	const char *mem_name;
169 } model_names[] = {
170 	{ "MBU_A", "MEMB" },
171 	{ "MBU_B", "MEMB" },
172 	{ "CMU", "" },
173 	{ "MBU_A", "" }
174 };
175 
176 /*
177  * The DIMM Names for DC platform.
178  * The index into this table is made up of (bank, dslot),
179  * Where dslot occupies bits 0-1 and bank occupies 2-4.
180  */
181 static char *mc_dc_dimm_unum_table[OPL_MAX_DIMMS] = {
182 	/* --------CMUnn----------- */
183 	/* --CS0-----|--CS1------ */
184 	/* -H-|--L-- | -H- | -L-- */
185 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
186 	"13A", "12A", "13B", "12B", /* Bank 1 (MAC 0 bank 1) */
187 	"23A", "22A", "23B", "22B", /* Bank 2 (MAC 1 bank 0) */
188 	"33A", "32A", "33B", "32B", /* Bank 3 (MAC 1 bank 1) */
189 	"01A", "00A", "01B", "00B", /* Bank 4 (MAC 2 bank 0) */
190 	"11A", "10A", "11B", "10B", /* Bank 5 (MAC 2 bank 1) */
191 	"21A", "20A", "21B", "20B", /* Bank 6 (MAC 3 bank 0) */
192 	"31A", "30A", "31B", "30B"  /* Bank 7 (MAC 3 bank 1) */
193 };
194 
195 /*
196  * The DIMM Names for FF1/FF2/IKKAKU platforms.
197  * The index into this table is made up of (board, bank, dslot),
198  * Where dslot occupies bits 0-1, bank occupies 2-4 and
199  * board occupies the bit 5.
200  */
201 static char *mc_ff_dimm_unum_table[2 * OPL_MAX_DIMMS] = {
202 	/* --------CMU0---------- */
203 	/* --CS0-----|--CS1------ */
204 	/* -H-|--L-- | -H- | -L-- */
205 	"03A", "02A", "03B", "02B", /* Bank 0 (MAC 0 bank 0) */
206 	"01A", "00A", "01B", "00B", /* Bank 1 (MAC 0 bank 1) */
207 	"13A", "12A", "13B", "12B", /* Bank 2 (MAC 1 bank 0) */
208 	"11A", "10A", "11B", "10B", /* Bank 3 (MAC 1 bank 1) */
209 	"23A", "22A", "23B", "22B", /* Bank 4 (MAC 2 bank 0) */
210 	"21A", "20A", "21B", "20B", /* Bank 5 (MAC 2 bank 1) */
211 	"33A", "32A", "33B", "32B", /* Bank 6 (MAC 3 bank 0) */
212 	"31A", "30A", "31B", "30B", /* Bank 7 (MAC 3 bank 1) */
213 	/* --------CMU1---------- */
214 	/* --CS0-----|--CS1------ */
215 	/* -H-|--L-- | -H- | -L-- */
216 	"43A", "42A", "43B", "42B", /* Bank 0 (MAC 0 bank 0) */
217 	"41A", "40A", "41B", "40B", /* Bank 1 (MAC 0 bank 1) */
218 	"53A", "52A", "53B", "52B", /* Bank 2 (MAC 1 bank 0) */
219 	"51A", "50A", "51B", "50B", /* Bank 3 (MAC 1 bank 1) */
220 	"63A", "62A", "63B", "62B", /* Bank 4 (MAC 2 bank 0) */
221 	"61A", "60A", "61B", "60B", /* Bank 5 (MAC 2 bank 1) */
222 	"73A", "72A", "73B", "72B", /* Bank 6 (MAC 3 bank 0) */
223 	"71A", "70A", "71B", "70B"  /* Bank 7 (MAC 3 bank 1) */
224 };
225 
226 #define	BD_BK_SLOT_TO_INDEX(bd, bk, s)			\
227 	(((bd & 0x01) << 5) | ((bk & 0x07) << 2) | (s & 0x03))
228 
229 #define	INDEX_TO_BANK(i)			(((i) & 0x1C) >> 2)
230 #define	INDEX_TO_SLOT(i)			((i) & 0x03)
231 
232 #define	SLOT_TO_CS(slot)	((slot & 0x3) >> 1)
233 
234 /* Isolation unit size is 64 MB */
235 #define	MC_ISOLATION_BSIZE	(64 * 1024 * 1024)
236 
237 #define	MC_MAX_SPEEDS 7
238 
239 typedef struct {
240 	uint32_t mc_speeds;
241 	uint32_t mc_period;
242 } mc_scan_speed_t;
243 
244 #define	MC_CNTL_SPEED_SHIFT 26
245 
246 /*
247  * In mirror mode, we normalized the bank idx to "even" since
248  * the HW treats them as one unit w.r.t programming.
249  * This bank index will be the "effective" bank index.
250  * All mirrored bank state info on mc_period, mc_speedup_period
251  * will be stored in the even bank structure to avoid code duplication.
252  */
253 #define	MIRROR_IDX(bankidx)	(bankidx & ~1)
254 
255 static mc_scan_speed_t	mc_scan_speeds[MC_MAX_SPEEDS] = {
256 	{0x6 << MC_CNTL_SPEED_SHIFT, 0},
257 	{0x5 << MC_CNTL_SPEED_SHIFT, 32},
258 	{0x4 << MC_CNTL_SPEED_SHIFT, 64},
259 	{0x3 << MC_CNTL_SPEED_SHIFT, 128},
260 	{0x2 << MC_CNTL_SPEED_SHIFT, 256},
261 	{0x1 << MC_CNTL_SPEED_SHIFT, 512},
262 	{0x0 << MC_CNTL_SPEED_SHIFT, 1024}
263 };
264 
265 static uint32_t	mc_max_speed = (0x6 << 26);
266 
267 int mc_isolation_bsize = MC_ISOLATION_BSIZE;
268 int mc_patrol_interval_sec = MC_PATROL_INTERVAL_SEC;
269 int mc_max_scf_retry = 16;
270 int mc_max_scf_logs = 64;
271 int mc_max_errlog_processed = BANKNUM_PER_SB*2;
272 int mc_scan_period = 12 * 60 * 60;	/* 12 hours period */
273 int mc_max_rewrite_loop = 100;
274 int mc_rewrite_delay = 10;
275 /*
276  * it takes SCF about 300 m.s. to process a requst.  We can bail out
277  * if it is busy.  It does not pay to wait for it too long.
278  */
279 int mc_max_scf_loop = 2;
280 int mc_scf_delay = 100;
281 int mc_pce_dropped = 0;
282 int mc_poll_priority = MINCLSYSPRI;
283 int mc_max_rewrite_retry = 6 * 60;
284 
285 
286 /*
287  * Mutex hierarchy in mc-opl
288  * If both mcmutex and mc_lock must be held,
289  * mcmutex must be acquired first, and then mc_lock.
290  */
291 
292 static kmutex_t mcmutex;
293 mc_opl_t *mc_instances[OPL_MAX_BOARDS];
294 
295 static kmutex_t mc_polling_lock;
296 static kcondvar_t mc_polling_cv;
297 static kcondvar_t mc_poll_exit_cv;
298 static int mc_poll_cmd = 0;
299 static int mc_pollthr_running = 0;
300 int mc_timeout_period = 0; /* this is in m.s. */
301 void *mc_statep;
302 
303 #ifdef	DEBUG
304 int oplmc_debug = 0;
305 #endif
306 
307 static int mc_debug_show_all = 0;
308 
309 extern struct mod_ops mod_driverops;
310 
311 static struct modldrv modldrv = {
312 	&mod_driverops,			/* module type, this one is a driver */
313 	"OPL Memory-controller",	/* module name */
314 	&mc_ops,			/* driver ops */
315 };
316 
317 static struct modlinkage modlinkage = {
318 	MODREV_1,		/* rev */
319 	(void *)&modldrv,
320 	NULL
321 };
322 
323 #pragma weak opl_get_mem_unum
324 #pragma weak opl_get_mem_sid
325 #pragma weak opl_get_mem_offset
326 #pragma weak opl_get_mem_addr
327 
328 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
329 extern int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
330 extern int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
331 extern int (*opl_get_mem_addr)(char *unum, char *sid, uint64_t offset,
332     uint64_t *paddr);
333 
334 
335 /*
336  * pseudo-mc node portid format
337  *
338  *		[10]   = 0
339  *		[9]    = 1
340  *		[8]    = LSB_ID[4] = 0
341  *		[7:4]  = LSB_ID[3:0]
342  *		[3:0]  = 0
343  *
344  */
345 
346 /*
347  * These are the module initialization routines.
348  */
349 int
350 _init(void)
351 {
352 	int	error;
353 	int	plen;
354 	char	model[20];
355 	pnode_t	node;
356 
357 
358 	if ((error = ddi_soft_state_init(&mc_statep,
359 	    sizeof (mc_opl_t), 1)) != 0)
360 		return (error);
361 
362 	if ((error = mc_poll_init()) != 0) {
363 		ddi_soft_state_fini(&mc_statep);
364 		return (error);
365 	}
366 
367 	mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
368 	if (&opl_get_mem_unum)
369 		opl_get_mem_unum = mc_get_mem_unum;
370 	if (&opl_get_mem_sid)
371 		opl_get_mem_sid = mc_get_mem_sid;
372 	if (&opl_get_mem_offset)
373 		opl_get_mem_offset = mc_get_mem_offset;
374 	if (&opl_get_mem_addr)
375 		opl_get_mem_addr = mc_get_mem_addr;
376 
377 	node = prom_rootnode();
378 	plen = prom_getproplen(node, "model");
379 
380 	if (plen > 0 && plen < sizeof (model)) {
381 		(void) prom_getprop(node, "model", model);
382 		model[plen] = '\0';
383 		if (strcmp(model, "FF1") == 0)
384 			plat_model = MODEL_FF1;
385 		else if (strcmp(model, "FF2") == 0)
386 			plat_model = MODEL_FF2;
387 		else if (strncmp(model, "DC", 2) == 0)
388 			plat_model = MODEL_DC;
389 		else if (strcmp(model, "IKKAKU") == 0)
390 			plat_model = MODEL_IKKAKU;
391 	}
392 
393 	error =  mod_install(&modlinkage);
394 	if (error != 0) {
395 		if (&opl_get_mem_unum)
396 			opl_get_mem_unum = NULL;
397 		if (&opl_get_mem_sid)
398 			opl_get_mem_sid = NULL;
399 		if (&opl_get_mem_offset)
400 			opl_get_mem_offset = NULL;
401 		if (&opl_get_mem_addr)
402 			opl_get_mem_addr = NULL;
403 		mutex_destroy(&mcmutex);
404 		mc_poll_fini();
405 		ddi_soft_state_fini(&mc_statep);
406 	}
407 	return (error);
408 }
409 
410 int
411 _fini(void)
412 {
413 	int error;
414 
415 	if ((error = mod_remove(&modlinkage)) != 0)
416 		return (error);
417 
418 	if (&opl_get_mem_unum)
419 		opl_get_mem_unum = NULL;
420 	if (&opl_get_mem_sid)
421 		opl_get_mem_sid = NULL;
422 	if (&opl_get_mem_offset)
423 		opl_get_mem_offset = NULL;
424 	if (&opl_get_mem_addr)
425 		opl_get_mem_addr = NULL;
426 
427 	mutex_destroy(&mcmutex);
428 	mc_poll_fini();
429 	ddi_soft_state_fini(&mc_statep);
430 
431 	return (0);
432 }
433 
434 int
435 _info(struct modinfo *modinfop)
436 {
437 	return (mod_info(&modlinkage, modinfop));
438 }
439 
440 static void
441 mc_polling_thread()
442 {
443 	mutex_enter(&mc_polling_lock);
444 	mc_pollthr_running = 1;
445 	while (!(mc_poll_cmd & MC_POLL_EXIT)) {
446 		mc_polling();
447 		(void) cv_reltimedwait(&mc_polling_cv, &mc_polling_lock,
448 		    mc_timeout_period, TR_CLOCK_TICK);
449 	}
450 	mc_pollthr_running = 0;
451 
452 	/*
453 	 * signal if any one is waiting for this thread to exit.
454 	 */
455 	cv_signal(&mc_poll_exit_cv);
456 	mutex_exit(&mc_polling_lock);
457 	thread_exit();
458 	/* NOTREACHED */
459 }
460 
461 static int
462 mc_poll_init()
463 {
464 	mutex_init(&mc_polling_lock, NULL, MUTEX_DRIVER, NULL);
465 	cv_init(&mc_polling_cv, NULL, CV_DRIVER, NULL);
466 	cv_init(&mc_poll_exit_cv, NULL, CV_DRIVER, NULL);
467 	return (0);
468 }
469 
470 static void
471 mc_poll_fini()
472 {
473 	mutex_enter(&mc_polling_lock);
474 	if (mc_pollthr_running) {
475 		mc_poll_cmd = MC_POLL_EXIT;
476 		cv_signal(&mc_polling_cv);
477 		while (mc_pollthr_running) {
478 			cv_wait(&mc_poll_exit_cv, &mc_polling_lock);
479 		}
480 	}
481 	mutex_exit(&mc_polling_lock);
482 	mutex_destroy(&mc_polling_lock);
483 	cv_destroy(&mc_polling_cv);
484 	cv_destroy(&mc_poll_exit_cv);
485 }
486 
487 static int
488 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
489 {
490 	mc_opl_t *mcp;
491 	int instance;
492 	int rv;
493 
494 	/* get the instance of this devi */
495 	instance = ddi_get_instance(devi);
496 
497 	switch (cmd) {
498 	case DDI_ATTACH:
499 		break;
500 	case DDI_RESUME:
501 		mcp = ddi_get_soft_state(mc_statep, instance);
502 		rv = mc_resume(mcp, MC_DRIVER_SUSPENDED);
503 		return (rv);
504 	default:
505 		return (DDI_FAILURE);
506 	}
507 
508 	if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS)
509 		return (DDI_FAILURE);
510 
511 	if (ddi_create_minor_node(devi, "mc-opl", S_IFCHR, instance,
512 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
513 		MC_LOG("mc_attach: create_minor_node failed\n");
514 		return (DDI_FAILURE);
515 	}
516 
517 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
518 		goto bad;
519 	}
520 
521 	if (mc_timeout_period == 0) {
522 		mc_patrol_interval_sec = (int)ddi_getprop(DDI_DEV_T_ANY, devi,
523 		    DDI_PROP_DONTPASS, "mc-timeout-interval-sec",
524 		    mc_patrol_interval_sec);
525 		mc_timeout_period = drv_usectohz(1000000 *
526 		    mc_patrol_interval_sec / OPL_MAX_BOARDS);
527 	}
528 
529 	/* set informations in mc state */
530 	mcp->mc_dip = devi;
531 
532 	if (mc_board_add(mcp))
533 		goto bad;
534 
535 	insert_mcp(mcp);
536 
537 	/*
538 	 * Start the polling thread if it is not running already.
539 	 */
540 	mutex_enter(&mc_polling_lock);
541 	if (!mc_pollthr_running) {
542 		(void) thread_create(NULL, 0, (void (*)())mc_polling_thread,
543 		    NULL, 0, &p0, TS_RUN, mc_poll_priority);
544 	}
545 	mutex_exit(&mc_polling_lock);
546 	ddi_report_dev(devi);
547 
548 	return (DDI_SUCCESS);
549 
550 bad:
551 	ddi_remove_minor_node(devi, NULL);
552 	ddi_soft_state_free(mc_statep, instance);
553 	return (DDI_FAILURE);
554 }
555 
556 /* ARGSUSED */
557 static int
558 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
559 {
560 	int rv;
561 	int instance;
562 	mc_opl_t *mcp;
563 
564 	/* get the instance of this devi */
565 	instance = ddi_get_instance(devi);
566 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
567 		return (DDI_FAILURE);
568 	}
569 
570 	switch (cmd) {
571 	case DDI_SUSPEND:
572 		rv = mc_suspend(mcp, MC_DRIVER_SUSPENDED);
573 		return (rv);
574 	case DDI_DETACH:
575 		break;
576 	default:
577 		return (DDI_FAILURE);
578 	}
579 
580 	delete_mcp(mcp);
581 	if (mc_board_del(mcp) != DDI_SUCCESS) {
582 		return (DDI_FAILURE);
583 	}
584 
585 	ddi_remove_minor_node(devi, NULL);
586 
587 	/* free up the soft state */
588 	ddi_soft_state_free(mc_statep, instance);
589 
590 	return (DDI_SUCCESS);
591 }
592 
593 /* ARGSUSED */
594 static int
595 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
596 {
597 	return (0);
598 }
599 
600 /* ARGSUSED */
601 static int
602 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
603 {
604 	return (0);
605 }
606 
607 /* ARGSUSED */
608 static int
609 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
610 	int *rvalp)
611 {
612 	mc_flt_page_t flt_page;
613 
614 	if (cmd == MCIOC_FAULT_PAGE) {
615 		if (arg == NULL)
616 			return (EINVAL);
617 
618 		if (ddi_copyin((const void *)arg, (void *)&flt_page,
619 		    sizeof (mc_flt_page_t), 0) < 0)
620 			return (EFAULT);
621 
622 		return (mc_scf_log_event(&flt_page));
623 	}
624 #ifdef DEBUG
625 	return (mc_ioctl_debug(dev, cmd, arg, mode, credp, rvalp));
626 #else
627 	return (ENOTTY);
628 #endif
629 }
630 
631 /*
632  * PA validity check:
633  * This function return 1 if the PA is a valid PA
634  * in the running Solaris instance i.e. in physinstall
635  * Otherwise, return 0.
636  */
637 
638 /* ARGSUSED */
639 static int
640 pa_is_valid(mc_opl_t *mcp, uint64_t addr)
641 {
642 	if (mcp->mlist == NULL)
643 		mc_get_mlist(mcp);
644 
645 	if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) {
646 		return (1);
647 	}
648 	return (0);
649 }
650 
651 /*
652  * mac-pa translation routines.
653  *
654  *    Input: mc driver state, (LSB#, Bank#, DIMM address)
655  *    Output: physical address
656  *
657  *    Valid   - return value:  0
658  *    Invalid - return value: -1
659  */
660 static int
661 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa)
662 {
663 	int i;
664 	uint64_t pa_offset = 0;
665 	int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1;
666 	int bank = maddr->ma_bank;
667 	mc_addr_t maddr1;
668 	int bank0, bank1;
669 
670 	MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
671 	    maddr->ma_dimm_addr);
672 
673 	/* loc validity check */
674 	ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd);
675 	ASSERT(bank >= 0 && OPL_BANK_MAX > bank);
676 
677 	/* Do translation */
678 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
679 		int pa_bit = 0;
680 		int mc_bit = mcp->mc_trans_table[cs][i];
681 		if (mc_bit < MC_ADDRESS_BITS) {
682 			pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1;
683 		} else if (mc_bit == MP_NONE) {
684 			pa_bit = 0;
685 		} else if (mc_bit == MP_BANK_0) {
686 			pa_bit = bank & 1;
687 		} else if (mc_bit == MP_BANK_1) {
688 			pa_bit = (bank >> 1) & 1;
689 		} else if (mc_bit == MP_BANK_2) {
690 			pa_bit = (bank >> 2) & 1;
691 		}
692 		pa_offset |= ((uint64_t)pa_bit) << i;
693 	}
694 	*pa = mcp->mc_start_address + pa_offset;
695 	MC_LOG("pa = %lx\n", *pa);
696 
697 	if (pa_to_maddr(mcp, *pa, &maddr1) == -1) {
698 		cmn_err(CE_WARN, "mcaddr_to_pa: /LSB%d/B%d/%x failed to "
699 		    "convert PA %lx\n", maddr->ma_bd, bank,
700 		    maddr->ma_dimm_addr, *pa);
701 		return (-1);
702 	}
703 
704 	/*
705 	 * In mirror mode, PA is always translated to the even bank.
706 	 */
707 	if (IS_MIRROR(mcp, maddr->ma_bank)) {
708 		bank0 = maddr->ma_bank & ~(1);
709 		bank1 = maddr1.ma_bank & ~(1);
710 	} else {
711 		bank0 = maddr->ma_bank;
712 		bank1 = maddr1.ma_bank;
713 	}
714 	/*
715 	 * there is no need to check ma_bd because it is generated from
716 	 * mcp.  They are the same.
717 	 */
718 	if ((bank0 == bank1) && (maddr->ma_dimm_addr ==
719 	    maddr1.ma_dimm_addr)) {
720 		return (0);
721 	} else {
722 		MC_LOG("Translation error source /LSB%d/B%d/%x, "
723 		    "PA %lx, target /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
724 		    maddr->ma_dimm_addr, *pa, maddr1.ma_bd, maddr1.ma_bank,
725 		    maddr1.ma_dimm_addr);
726 		return (-1);
727 	}
728 }
729 
730 /*
731  * PA to CS (used by pa_to_maddr).
732  */
733 static int
734 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset)
735 {
736 	int i;
737 	int cs = 1;
738 
739 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
740 		/* MAC address bit<29> is arranged on the same PA bit */
741 		/* on both table. So we may use any table. */
742 		if (mcp->mc_trans_table[0][i] == CS_SHIFT) {
743 			cs = (pa_offset >> i) & 1;
744 			break;
745 		}
746 	}
747 	return (cs);
748 }
749 
750 /*
751  * PA to DIMM (used by pa_to_maddr).
752  */
753 /* ARGSUSED */
754 static uint32_t
755 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset)
756 {
757 	int i;
758 	int cs = pa_to_cs(mcp, pa_offset);
759 	uint32_t dimm_addr = 0;
760 
761 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
762 		int pa_bit_value = (pa_offset >> i) & 1;
763 		int mc_bit = mcp->mc_trans_table[cs][i];
764 		if (mc_bit < MC_ADDRESS_BITS) {
765 			dimm_addr |= pa_bit_value << mc_bit;
766 		}
767 	}
768 	dimm_addr |= cs << CS_SHIFT;
769 	return (dimm_addr);
770 }
771 
772 /*
773  * PA to Bank (used by pa_to_maddr).
774  */
775 static int
776 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset)
777 {
778 	int i;
779 	int cs = pa_to_cs(mcp, pa_offset);
780 	int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT];
781 
782 
783 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
784 		int pa_bit_value = (pa_offset >> i) & 1;
785 		int mc_bit = mcp->mc_trans_table[cs][i];
786 		switch (mc_bit) {
787 		case MP_BANK_0:
788 			bankno |= pa_bit_value;
789 			break;
790 		case MP_BANK_1:
791 			bankno |= pa_bit_value << 1;
792 			break;
793 		case MP_BANK_2:
794 			bankno |= pa_bit_value << 2;
795 			break;
796 		}
797 	}
798 
799 	return (bankno);
800 }
801 
802 /*
803  * PA to MAC address translation
804  *
805  *   Input: MAC driver state, physicall adress
806  *   Output: LSB#, Bank id, mac address
807  *
808  *    Valid   - return value:  0
809  *    Invalid - return value: -1
810  */
811 
812 int
813 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr)
814 {
815 	uint64_t pa_offset;
816 
817 	if (!mc_rangecheck_pa(mcp, pa))
818 		return (-1);
819 
820 	/* Do translation */
821 	pa_offset = pa - mcp->mc_start_address;
822 
823 	maddr->ma_bd = mcp->mc_board_num;
824 	maddr->ma_phys_bd = mcp->mc_phys_board_num;
825 	maddr->ma_bank = pa_to_bank(mcp, pa_offset);
826 	maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset);
827 	MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n", pa_offset, maddr->ma_bd,
828 	    maddr->ma_bank, maddr->ma_dimm_addr);
829 	return (0);
830 }
831 
832 /*
833  * UNUM format for DC is "/CMUnn/MEMxyZ", where
834  *	nn = 00..03 for DC1 and 00..07 for DC2 and 00..15 for DC3.
835  *	x = MAC 0..3
836  *	y = 0..3 (slot info).
837  *	Z = 'A' or 'B'
838  *
839  * UNUM format for FF1 is "/MBU_A/MEMBx/MEMyZ", where
840  *	x = 0..3 (MEMB number)
841  *	y = 0..3 (slot info).
842  *	Z = 'A' or 'B'
843  *
844  * UNUM format for FF2 is "/MBU_B/MEMBx/MEMyZ", where
845  *	x = 0..7 (MEMB number)
846  *	y = 0..3 (slot info).
847  *	Z = 'A' or 'B'
848  *
849  * UNUM format for IKKAKU is "/MBU_A/MEMyZ", where
850  *	y = 0..3 (slot info).
851  *	Z = 'A' or 'B'
852  *
853  */
854 int
855 mc_set_mem_unum(char *buf, int buflen, int sb, int bank,
856     uint32_t mf_type, uint32_t d_slot)
857 {
858 	char *dimmnm;
859 	char memb_num;
860 	int cs;
861 	int i;
862 	int j;
863 
864 	cs = SLOT_TO_CS(d_slot);
865 
866 	switch (plat_model) {
867 	case MODEL_DC:
868 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
869 		    mf_type == FLT_TYPE_PERMANENT_CE) {
870 			i = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
871 			dimmnm = mc_dc_dimm_unum_table[i];
872 			(void) snprintf(buf, buflen, "/%s%02d/MEM%s",
873 			    model_names[plat_model].unit_name, sb, dimmnm);
874 		} else {
875 			i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
876 			j = (cs == 0) ?  i : i + 2;
877 			(void) snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
878 			    model_names[plat_model].unit_name, sb,
879 			    mc_dc_dimm_unum_table[j],
880 			    mc_dc_dimm_unum_table[j + 1]);
881 		}
882 		break;
883 	case MODEL_FF1:
884 	case MODEL_FF2:
885 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
886 		    mf_type == FLT_TYPE_PERMANENT_CE) {
887 			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
888 			dimmnm = mc_ff_dimm_unum_table[i];
889 			memb_num = dimmnm[0];
890 			(void) snprintf(buf, buflen, "/%s/%s%c/MEM%s",
891 			    model_names[plat_model].unit_name,
892 			    model_names[plat_model].mem_name,
893 			    memb_num, &dimmnm[1]);
894 		} else {
895 			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
896 			j = (cs == 0) ?  i : i + 2;
897 			memb_num = mc_ff_dimm_unum_table[i][0],
898 			    (void) snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
899 			    model_names[plat_model].unit_name,
900 			    model_names[plat_model].mem_name, memb_num,
901 			    &mc_ff_dimm_unum_table[j][1],
902 			    &mc_ff_dimm_unum_table[j + 1][1]);
903 		}
904 		break;
905 	case MODEL_IKKAKU:
906 		if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
907 		    mf_type == FLT_TYPE_PERMANENT_CE) {
908 			i = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
909 			dimmnm = mc_ff_dimm_unum_table[i];
910 			(void) snprintf(buf, buflen, "/%s/MEM%s",
911 			    model_names[plat_model].unit_name, &dimmnm[1]);
912 		} else {
913 			i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
914 			j = (cs == 0) ?  i : i + 2;
915 			memb_num = mc_ff_dimm_unum_table[i][0],
916 			    (void) snprintf(buf, buflen, "/%s/MEM%s MEM%s",
917 			    model_names[plat_model].unit_name,
918 			    &mc_ff_dimm_unum_table[j][1],
919 			    &mc_ff_dimm_unum_table[j + 1][1]);
920 		}
921 		break;
922 	default:
923 		return (-1);
924 	}
925 	return (0);
926 }
927 
928 static void
929 mc_ereport_post(mc_aflt_t *mc_aflt)
930 {
931 	char buf[FM_MAX_CLASS];
932 	char device_path[MAXPATHLEN];
933 	char sid[MAXPATHLEN];
934 	nv_alloc_t *nva = NULL;
935 	nvlist_t *ereport, *detector, *resource;
936 	errorq_elem_t *eqep;
937 	int nflts;
938 	mc_flt_stat_t *flt_stat;
939 	int i, n;
940 	int blen = MAXPATHLEN;
941 	char *p, *s = NULL;
942 	uint32_t values[2], synd[2], dslot[2];
943 	uint64_t offset = (uint64_t)-1;
944 	int ret = -1;
945 
946 	if (panicstr) {
947 		eqep = errorq_reserve(ereport_errorq);
948 		if (eqep == NULL)
949 			return;
950 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
951 		nva = errorq_elem_nva(ereport_errorq, eqep);
952 	} else {
953 		ereport = fm_nvlist_create(nva);
954 	}
955 
956 	/*
957 	 * Create the scheme "dev" FMRI.
958 	 */
959 	detector = fm_nvlist_create(nva);
960 	resource = fm_nvlist_create(nva);
961 
962 	nflts = mc_aflt->mflt_nflts;
963 
964 	ASSERT(nflts >= 1 && nflts <= 2);
965 
966 	flt_stat = mc_aflt->mflt_stat[0];
967 	(void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path);
968 	(void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL,
969 	    device_path, NULL, NULL);
970 
971 	/*
972 	 * Encode all the common data into the ereport.
973 	 */
974 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s", MC_OPL_ERROR_CLASS,
975 	    mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS : MC_OPL_MI_SUBCLASS,
976 	    mc_aflt->mflt_erpt_class);
977 
978 	MC_LOG("mc_ereport_post: ereport %s\n", buf);
979 
980 
981 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
982 	    fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1), detector, NULL);
983 
984 	/*
985 	 * Set payload.
986 	 */
987 	fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32,
988 	    flt_stat->mf_flt_maddr.ma_bd, NULL);
989 
990 	fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64,
991 	    flt_stat->mf_flt_paddr, NULL);
992 
993 	if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE ||
994 	    flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
995 		fm_payload_set(ereport, MC_OPL_FLT_TYPE, DATA_TYPE_UINT8,
996 		    ECC_STICKY, NULL);
997 	}
998 
999 	for (i = 0; i < nflts; i++)
1000 		values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank;
1001 
1002 	fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY, nflts,
1003 	    values, NULL);
1004 
1005 	for (i = 0; i < nflts; i++)
1006 		values[i] = mc_aflt->mflt_stat[i]->mf_cntl;
1007 
1008 	fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY, nflts,
1009 	    values, NULL);
1010 
1011 	for (i = 0; i < nflts; i++)
1012 		values[i] = mc_aflt->mflt_stat[i]->mf_err_add;
1013 
1014 	/* offset is set only for PCE and ICE */
1015 	if (mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_INTERMITTENT_CE ||
1016 	    mc_aflt->mflt_stat[0]->mf_type == FLT_TYPE_PERMANENT_CE) {
1017 		offset = values[0];
1018 
1019 	}
1020 	fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY, nflts,
1021 	    values, NULL);
1022 
1023 	for (i = 0; i < nflts; i++)
1024 		values[i] = mc_aflt->mflt_stat[i]->mf_err_log;
1025 
1026 	fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY, nflts,
1027 	    values, NULL);
1028 
1029 	for (i = 0; i < nflts; i++) {
1030 		flt_stat = mc_aflt->mflt_stat[i];
1031 		if (flt_stat->mf_errlog_valid) {
1032 			synd[i] = flt_stat->mf_synd;
1033 			dslot[i] = flt_stat->mf_dimm_slot;
1034 			values[i] = flt_stat->mf_dram_place;
1035 		} else {
1036 			synd[i] = 0;
1037 			dslot[i] = 0;
1038 			values[i] = 0;
1039 		}
1040 	}
1041 
1042 	fm_payload_set(ereport, MC_OPL_ERR_SYND, DATA_TYPE_UINT32_ARRAY, nflts,
1043 	    synd, NULL);
1044 
1045 	fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT, DATA_TYPE_UINT32_ARRAY,
1046 	    nflts, dslot, NULL);
1047 
1048 	fm_payload_set(ereport, MC_OPL_ERR_DRAM, DATA_TYPE_UINT32_ARRAY, nflts,
1049 	    values, NULL);
1050 
1051 	device_path[0] = 0;
1052 	p = &device_path[0];
1053 	sid[0] = 0;
1054 	s = &sid[0];
1055 	ret = 0;
1056 
1057 	for (i = 0; i < nflts; i++) {
1058 		int bank;
1059 
1060 		flt_stat = mc_aflt->mflt_stat[i];
1061 		bank = flt_stat->mf_flt_maddr.ma_bank;
1062 		ret = mc_set_mem_unum(p + strlen(p), blen,
1063 		    flt_stat->mf_flt_maddr.ma_phys_bd, bank, flt_stat->mf_type,
1064 		    flt_stat->mf_dimm_slot);
1065 
1066 		if (ret != 0) {
1067 			cmn_err(CE_WARN,
1068 			    "mc_ereport_post: Failed to determine the unum "
1069 			    "for board=%d bank=%d type=0x%x slot=0x%x",
1070 			    flt_stat->mf_flt_maddr.ma_bd, bank,
1071 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1072 			continue;
1073 		}
1074 		n = strlen(device_path);
1075 		blen = MAXPATHLEN - n;
1076 		p = &device_path[n];
1077 		if (i < (nflts - 1)) {
1078 			(void) snprintf(p, blen, " ");
1079 			blen--;
1080 			p++;
1081 		}
1082 
1083 		if (ret == 0) {
1084 			ret = mc_set_mem_sid(mc_aflt->mflt_mcp, s + strlen(s),
1085 			    blen, flt_stat->mf_flt_maddr.ma_phys_bd, bank,
1086 			    flt_stat->mf_type, flt_stat->mf_dimm_slot);
1087 
1088 		}
1089 	}
1090 
1091 	(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1092 	    device_path, (ret == 0) ? sid : NULL, (ret == 0) ? offset :
1093 	    (uint64_t)-1);
1094 
1095 	fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST, resource,
1096 	    NULL);
1097 
1098 	if (panicstr) {
1099 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1100 	} else {
1101 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1102 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1103 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1104 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1105 	}
1106 }
1107 
1108 
1109 static void
1110 mc_err_drain(mc_aflt_t *mc_aflt)
1111 {
1112 	int rv;
1113 	uint64_t pa = (uint64_t)(-1);
1114 	int i;
1115 
1116 	MC_LOG("mc_err_drain: %s\n", mc_aflt->mflt_erpt_class);
1117 	/*
1118 	 * we come here only when we have:
1119 	 * In mirror mode: MUE, SUE
1120 	 * In normal mode: UE, Permanent CE, Intermittent CE
1121 	 */
1122 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1123 		rv = mcaddr_to_pa(mc_aflt->mflt_mcp,
1124 		    &(mc_aflt->mflt_stat[i]->mf_flt_maddr), &pa);
1125 
1126 		/* Ensure the pa is valid (not in isolated memory block) */
1127 		if (rv == 0 && pa_is_valid(mc_aflt->mflt_mcp, pa))
1128 			mc_aflt->mflt_stat[i]->mf_flt_paddr = pa;
1129 		else
1130 			mc_aflt->mflt_stat[i]->mf_flt_paddr = (uint64_t)-1;
1131 	}
1132 
1133 	MC_LOG("mc_err_drain:pa = %lx\n", pa);
1134 
1135 	switch (page_retire_check(pa, NULL)) {
1136 	case 0:
1137 	case EAGAIN:
1138 		MC_LOG("Page retired or pending\n");
1139 		return;
1140 	case EIO:
1141 		/*
1142 		 * Do page retirement except for the PCE and ICE cases.
1143 		 * This is taken care by the OPL DE
1144 		 */
1145 		if (mc_aflt->mflt_stat[0]->mf_type !=
1146 		    FLT_TYPE_INTERMITTENT_CE &&
1147 		    mc_aflt->mflt_stat[0]->mf_type != FLT_TYPE_PERMANENT_CE) {
1148 			MC_LOG("offline page at pa %lx error %x\n", pa,
1149 			    mc_aflt->mflt_pr);
1150 			(void) page_retire(pa, mc_aflt->mflt_pr);
1151 		}
1152 		break;
1153 	case EINVAL:
1154 	default:
1155 		/*
1156 		 * Some memory do not have page structure so
1157 		 * we keep going in case of EINVAL.
1158 		 */
1159 		break;
1160 	}
1161 
1162 	for (i = 0; i < mc_aflt->mflt_nflts; i++) {
1163 		mc_aflt_t mc_aflt0;
1164 		if (mc_aflt->mflt_stat[i]->mf_flt_paddr != (uint64_t)-1) {
1165 			mc_aflt0 = *mc_aflt;
1166 			mc_aflt0.mflt_nflts = 1;
1167 			mc_aflt0.mflt_stat[0] = mc_aflt->mflt_stat[i];
1168 			mc_ereport_post(&mc_aflt0);
1169 		}
1170 	}
1171 }
1172 
1173 /*
1174  * The restart address is actually defined in unit of PA[37:6]
1175  * the mac patrol will convert that to dimm offset.  If the
1176  * address is not in the bank, it will continue to search for
1177  * the next PA that is within the bank.
1178  *
1179  * Also the mac patrol scans the dimms based on PA, not
1180  * dimm offset.
1181  */
1182 static int
1183 restart_patrol(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr_info)
1184 {
1185 	uint64_t pa;
1186 	int rv;
1187 
1188 	if (MC_REWRITE_MODE(mcp, bank)) {
1189 		return (0);
1190 	}
1191 	if (rsaddr_info == NULL || (rsaddr_info->mi_valid == 0)) {
1192 		MAC_PTRL_START(mcp, bank);
1193 		return (0);
1194 	}
1195 
1196 	rv = mcaddr_to_pa(mcp, &rsaddr_info->mi_restartaddr, &pa);
1197 	if (rv != 0) {
1198 		MC_LOG("cannot convert mcaddr to pa. use auto restart\n");
1199 		MAC_PTRL_START(mcp, bank);
1200 		return (0);
1201 	}
1202 
1203 	if (!mc_rangecheck_pa(mcp, pa)) {
1204 		/* pa is not on this board, just retry */
1205 		cmn_err(CE_WARN, "restart_patrol: invalid address %lx "
1206 		    "on board %d\n", pa, mcp->mc_board_num);
1207 		MAC_PTRL_START(mcp, bank);
1208 		return (0);
1209 	}
1210 
1211 	MC_LOG("restart_patrol: pa = %lx\n", pa);
1212 
1213 	if (!rsaddr_info->mi_injectrestart) {
1214 		/*
1215 		 * For non-error injection restart we need to
1216 		 * determine if the current restart pa/page is
1217 		 * a "good" page. A "good" page is a page that
1218 		 * has not been page retired. If the current
1219 		 * page that contains the pa is "good", we will
1220 		 * do a HW auto restart and let HW patrol continue
1221 		 * where it last stopped. Most desired scenario.
1222 		 *
1223 		 * If the current page is not "good", we will advance
1224 		 * to the next page to find the next "good" page and
1225 		 * restart the patrol from there.
1226 		 */
1227 		int wrapcount = 0;
1228 		uint64_t origpa = pa;
1229 		while (wrapcount < 2) {
1230 			if (!pa_is_valid(mcp, pa)) {
1231 			/*
1232 			 * Not in physinstall - advance to the
1233 			 * next memory isolation blocksize
1234 			 */
1235 			MC_LOG("Invalid PA\n");
1236 			pa = roundup(pa + 1, mc_isolation_bsize);
1237 			} else {
1238 			int rv;
1239 			if ((rv = page_retire_check(pa, NULL)) != 0 &&
1240 			    rv != EAGAIN) {
1241 					/*
1242 					 * The page is "good" (not retired),
1243 					 * we will use automatic HW restart
1244 					 * algorithm if this is the original
1245 					 * current starting page.
1246 					 */
1247 				if (pa == origpa) {
1248 					MC_LOG("Page has no error. "
1249 					    "Auto restart\n");
1250 					MAC_PTRL_START(mcp, bank);
1251 					return (0);
1252 				} else {
1253 					/*
1254 					 * found a subsequent good page
1255 					 */
1256 					break;
1257 				}
1258 			}
1259 
1260 			/*
1261 			 * Skip to the next page
1262 			 */
1263 			pa = roundup(pa + 1, PAGESIZE);
1264 			MC_LOG("Skipping bad page to %lx\n", pa);
1265 			}
1266 
1267 		    /* Check to see if we hit the end of the memory range */
1268 			if (pa >= (mcp->mc_start_address + mcp->mc_size)) {
1269 			MC_LOG("Wrap around\n");
1270 			pa = mcp->mc_start_address;
1271 			wrapcount++;
1272 			}
1273 		}
1274 
1275 		if (wrapcount > 1) {
1276 			MC_LOG("Failed to find a good page. Just restart\n");
1277 			MAC_PTRL_START(mcp, bank);
1278 			return (0);
1279 		}
1280 	}
1281 
1282 	/*
1283 	 * We reached here either:
1284 	 * 1. We are doing an error injection restart that specify
1285 	 *    the exact pa/page to restart. OR
1286 	 * 2. We found a subsequent good page different from the
1287 	 *    original restart pa/page.
1288 	 * Restart MAC patrol: PA[37:6]
1289 	 */
1290 	MC_LOG("restart at pa = %lx\n", pa);
1291 	ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa));
1292 	MAC_PTRL_START_ADD(mcp, bank);
1293 
1294 	return (0);
1295 }
1296 
1297 static void
1298 mc_retry_info_put(mc_retry_info_t **q, mc_retry_info_t *p)
1299 {
1300 	ASSERT(p != NULL);
1301 	p->ri_next = *q;
1302 	*q = p;
1303 }
1304 
1305 static mc_retry_info_t *
1306 mc_retry_info_get(mc_retry_info_t **q)
1307 {
1308 	mc_retry_info_t *p;
1309 
1310 	if ((p = *q) != NULL) {
1311 		*q = p->ri_next;
1312 		return (p);
1313 	} else {
1314 		return (NULL);
1315 	}
1316 }
1317 
1318 /*
1319  * Rewriting is used for two purposes.
1320  *  - to correct the error in memory.
1321  *  - to determine whether the error is permanent or intermittent.
1322  * It's done by writing the address in MAC_BANKm_REWRITE_ADD
1323  * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that,
1324  * REW_END (and REW_CE/REW_UE if some error detected) is set when
1325  * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM.
1326  *
1327  * Note that rewrite operation doesn't change RAW_UE to Marked UE.
1328  * Therefore, we use it only CE case.
1329  */
1330 
1331 static uint32_t
1332 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr, int retrying)
1333 {
1334 	uint32_t cntl;
1335 	int count = 0;
1336 	int max_count;
1337 	int retry_state;
1338 
1339 	if (retrying)
1340 		max_count = 1;
1341 	else
1342 		max_count = mc_max_rewrite_loop;
1343 
1344 	retry_state = RETRY_STATE_PENDING;
1345 
1346 	if (!retrying && MC_REWRITE_MODE(mcp, bank)) {
1347 		goto timeout;
1348 	}
1349 
1350 	retry_state = RETRY_STATE_ACTIVE;
1351 
1352 	/* first wait to make sure PTRL_STATUS is 0 */
1353 	while (count++ < max_count) {
1354 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1355 		if (!(cntl & MAC_CNTL_PTRL_STATUS)) {
1356 			count = 0;
1357 			break;
1358 		}
1359 		drv_usecwait(mc_rewrite_delay);
1360 	}
1361 	if (count >= max_count)
1362 		goto timeout;
1363 
1364 	count = 0;
1365 
1366 	ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr);
1367 	MAC_REW_REQ(mcp, bank);
1368 
1369 	retry_state = RETRY_STATE_REWRITE;
1370 
1371 	do {
1372 		if (count++ > max_count) {
1373 			goto timeout;
1374 		} else {
1375 			drv_usecwait(mc_rewrite_delay);
1376 		}
1377 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1378 	/*
1379 	 * If there are other MEMORY or PCI activities, this
1380 	 * will be BUSY, else it should be set immediately
1381 	 */
1382 	} while (!(cntl & MAC_CNTL_REW_END));
1383 
1384 	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
1385 	return (cntl);
1386 timeout:
1387 	mc_set_rewrite(mcp, bank, dimm_addr, retry_state);
1388 
1389 	return (0);
1390 }
1391 
1392 void
1393 mc_clear_rewrite(mc_opl_t *mcp, int bank)
1394 {
1395 	struct mc_bank *bankp;
1396 	mc_retry_info_t *retry;
1397 	uint32_t rew_addr;
1398 
1399 	bankp = &(mcp->mc_bank[bank]);
1400 	retry = bankp->mcb_active;
1401 	bankp->mcb_active = NULL;
1402 	mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1403 
1404 again:
1405 	bankp->mcb_rewrite_count = 0;
1406 
1407 	while (retry = mc_retry_info_get(&bankp->mcb_retry_pending)) {
1408 		rew_addr = retry->ri_addr;
1409 		mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
1410 		if (do_rewrite(mcp, bank, rew_addr, 1) == 0)
1411 			break;
1412 	}
1413 
1414 	/* we break out if no more pending rewrite or we got timeout again */
1415 
1416 	if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1417 		if (!IS_MIRROR(mcp, bank)) {
1418 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1419 		} else {
1420 			int mbank = bank ^ 1;
1421 			bankp = &(mcp->mc_bank[mbank]);
1422 			if (!bankp->mcb_active && !bankp->mcb_retry_pending) {
1423 			MC_CLEAR_REWRITE_MODE(mcp, bank);
1424 			MC_CLEAR_REWRITE_MODE(mcp, mbank);
1425 			} else {
1426 			bank = mbank;
1427 			goto again;
1428 			}
1429 		}
1430 	}
1431 }
1432 
1433 void
1434 mc_set_rewrite(mc_opl_t *mcp, int bank, uint32_t addr, int state)
1435 {
1436 	mc_retry_info_t *retry;
1437 	struct mc_bank *bankp;
1438 
1439 	bankp = &mcp->mc_bank[bank];
1440 
1441 	retry = mc_retry_info_get(&bankp->mcb_retry_freelist);
1442 
1443 	if (retry == NULL) {
1444 		mc_addr_t maddr;
1445 		uint64_t paddr;
1446 		/*
1447 		 * previous rewrite request has not completed yet.
1448 		 * So we discard this rewrite request.
1449 		 */
1450 		maddr.ma_bd = mcp->mc_board_num;
1451 		maddr.ma_bank =  bank;
1452 		maddr.ma_dimm_addr = addr;
1453 		if (mcaddr_to_pa(mcp, &maddr, &paddr) == 0) {
1454 			cmn_err(CE_WARN, "Discard CE rewrite request"
1455 			    " for 0x%lx (/LSB%d/B%d/%x).\n",
1456 			    paddr, mcp->mc_board_num, bank, addr);
1457 		} else {
1458 			cmn_err(CE_WARN, "Discard CE rewrite request"
1459 			    " for /LSB%d/B%d/%x.\n",
1460 			    mcp->mc_board_num, bank, addr);
1461 		}
1462 		return;
1463 	}
1464 
1465 	retry->ri_addr = addr;
1466 	retry->ri_state = state;
1467 
1468 	MC_SET_REWRITE_MODE(mcp, bank);
1469 
1470 	if ((state > RETRY_STATE_PENDING)) {
1471 		ASSERT(bankp->mcb_active == NULL);
1472 		bankp->mcb_active = retry;
1473 	} else {
1474 		mc_retry_info_put(&bankp->mcb_retry_pending, retry);
1475 	}
1476 
1477 	if (IS_MIRROR(mcp, bank)) {
1478 		int mbank = bank ^1;
1479 		MC_SET_REWRITE_MODE(mcp, mbank);
1480 	}
1481 }
1482 
1483 void
1484 mc_process_scf_log(mc_opl_t *mcp)
1485 {
1486 	int count;
1487 	int n = 0;
1488 	scf_log_t *p;
1489 	int bank;
1490 
1491 	for (bank = 0; bank < BANKNUM_PER_SB; bank++) {
1492 		while ((p = mcp->mc_scf_log[bank]) != NULL &&
1493 		    (n < mc_max_errlog_processed)) {
1494 		ASSERT(bank == p->sl_bank);
1495 		count = 0;
1496 		while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank))
1497 		    & MAC_STATIC_ERR_VLD)) {
1498 			if (count++ >= (mc_max_scf_loop)) {
1499 				break;
1500 			}
1501 			drv_usecwait(mc_scf_delay);
1502 		}
1503 
1504 		if (count < mc_max_scf_loop) {
1505 			ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank),
1506 			    p->sl_err_log);
1507 
1508 			ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank),
1509 			    p->sl_err_add|MAC_STATIC_ERR_VLD);
1510 			mcp->mc_scf_retry[bank] = 0;
1511 		} else {
1512 			/*
1513 			 * if we try too many times, just drop the req
1514 			 */
1515 			if (mcp->mc_scf_retry[bank]++ <=
1516 			    mc_max_scf_retry) {
1517 				return;
1518 			} else {
1519 				if ((++mc_pce_dropped & 0xff) == 0) {
1520 					cmn_err(CE_WARN, "Cannot "
1521 					    "report CE to SCF\n");
1522 				}
1523 			}
1524 		}
1525 		n++;
1526 		mcp->mc_scf_log[bank] = p->sl_next;
1527 		mcp->mc_scf_total[bank]--;
1528 		ASSERT(mcp->mc_scf_total[bank] >= 0);
1529 		kmem_free(p, sizeof (scf_log_t));
1530 		}
1531 	}
1532 }
1533 void
1534 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank)
1535 {
1536 	scf_log_t *p;
1537 
1538 	if (mcp->mc_scf_total[bank] >= mc_max_scf_logs) {
1539 		if ((++mc_pce_dropped & 0xff) == 0) {
1540 			cmn_err(CE_WARN, "Too many CE requests.\n");
1541 		}
1542 		return;
1543 	}
1544 	p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP);
1545 	p->sl_next = 0;
1546 	p->sl_err_add = flt_stat->mf_err_add;
1547 	p->sl_err_log = flt_stat->mf_err_log;
1548 	p->sl_bank = bank;
1549 
1550 	if (mcp->mc_scf_log[bank] == NULL) {
1551 		/*
1552 		 * we rely on mc_scf_log to detect NULL queue.
1553 		 * mc_scf_log_tail is irrelevant is such case.
1554 		 */
1555 		mcp->mc_scf_log_tail[bank] = mcp->mc_scf_log[bank] = p;
1556 	} else {
1557 		mcp->mc_scf_log_tail[bank]->sl_next = p;
1558 		mcp->mc_scf_log_tail[bank] = p;
1559 	}
1560 	mcp->mc_scf_total[bank]++;
1561 }
1562 /*
1563  * This routine determines what kind of CE happens, intermittent
1564  * or permanent as follows. (See 4.7.3 in Columbus2 PRM.)
1565  * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register.
1566  * - If CE is still detected on the same address even after doing
1567  *   rewrite operation twice, it is determined as permanent error.
1568  * - If error is not detected anymore, it is determined as intermittent
1569  *   error.
1570  * - If UE is detected due to rewrite operation, it should be treated
1571  *   as UE.
1572  */
1573 
1574 /* ARGSUSED */
1575 static void
1576 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error)
1577 {
1578 	uint32_t cntl;
1579 	int i;
1580 
1581 	flt_stat->mf_type = FLT_TYPE_PERMANENT_CE;
1582 	/*
1583 	 * rewrite request 1st time reads and correct error data
1584 	 * and write to DIMM.  2nd rewrite request must be issued
1585 	 * after REW_CE/UE/END is 0.  When the 2nd request is completed,
1586 	 * if REW_CE = 1, then it is permanent CE.
1587 	 */
1588 	for (i = 0; i < 2; i++) {
1589 		cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add, 0);
1590 
1591 		if (cntl == 0) {
1592 			/* timeout case */
1593 			return;
1594 		}
1595 		/*
1596 		 * If the error becomes UE or CMPE
1597 		 * we return to the caller immediately.
1598 		 */
1599 		if (cntl & MAC_CNTL_REW_UE) {
1600 			if (ptrl_error)
1601 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE;
1602 			else
1603 				flt_stat->mf_cntl |= MAC_CNTL_MI_UE;
1604 			flt_stat->mf_type = FLT_TYPE_UE;
1605 			return;
1606 		}
1607 		if (cntl & MAC_CNTL_REW_CMPE) {
1608 			if (ptrl_error)
1609 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE;
1610 			else
1611 				flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE;
1612 			flt_stat->mf_type = FLT_TYPE_CMPE;
1613 			return;
1614 		}
1615 	}
1616 	if (!(cntl & MAC_CNTL_REW_CE)) {
1617 		flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE;
1618 	}
1619 
1620 	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1621 		/* report PERMANENT_CE to SP via SCF */
1622 		if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) {
1623 			mc_queue_scf_log(mcp, flt_stat, bank);
1624 		}
1625 	}
1626 }
1627 
1628 #define	IS_CMPE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\
1629 				MAC_CNTL_MI_CMPE))
1630 #define	IS_UE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE))
1631 #define	IS_CE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE))
1632 #define	IS_OK(cntl, f)	(!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \
1633 			MAC_CNTL_MI_ERRS)))
1634 
1635 
1636 static int
1637 IS_CE_ONLY(uint32_t cntl, int ptrl_error)
1638 {
1639 	if (ptrl_error) {
1640 		return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE);
1641 	} else {
1642 		return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE);
1643 	}
1644 }
1645 
1646 void
1647 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value)
1648 {
1649 	int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
1650 
1651 	if (mcp->mc_speedup_period[ebank] > 0)
1652 		value |= mc_max_speed;
1653 	else
1654 		value |= mcp->mc_speed;
1655 	ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value);
1656 }
1657 
1658 static void
1659 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1660 {
1661 	flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1662 	    MAC_CNTL_PTRL_ERRS;
1663 	flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank));
1664 	flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank));
1665 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1666 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1667 	flt_stat->mf_flt_maddr.ma_bank = bank;
1668 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1669 }
1670 
1671 static void
1672 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1673 {
1674 	uint32_t status, old_status;
1675 
1676 	status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) & MAC_CNTL_MI_ERRS;
1677 	old_status = 0;
1678 
1679 	/* we keep reading until the status is stable */
1680 	while (old_status != status) {
1681 		old_status = status;
1682 		flt_stat->mf_err_add = LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank));
1683 		flt_stat->mf_err_log = LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank));
1684 		status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1685 		    MAC_CNTL_MI_ERRS;
1686 		if (status == old_status) {
1687 			break;
1688 		}
1689 	}
1690 
1691 	flt_stat->mf_cntl = status;
1692 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1693 	flt_stat->mf_flt_maddr.ma_phys_bd = mcp->mc_phys_board_num;
1694 	flt_stat->mf_flt_maddr.ma_bank = bank;
1695 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1696 }
1697 
1698 
1699 /*
1700  * Error philosophy for mirror mode:
1701  *
1702  * PTRL (The error address for both banks are same, since ptrl stops if it
1703  * detects error.)
1704  * - Compare error  log CMPE.
1705  *
1706  * - UE-UE           Report MUE.  No rewrite.
1707  *
1708  * - UE-*	     UE-(CE/OK). Rewrite to scrub UE.  Report SUE.
1709  *
1710  * - CE-*            CE-(CE/OK). Scrub to determine if CE is permanent.
1711  *                   If CE is permanent, inform SCF.  Once for each
1712  *		     Dimm.  If CE becomes UE or CMPE, go back to above.
1713  *
1714  *
1715  * MI (The error addresses for each bank are the same or different.)
1716  * - Compare  error  If addresses are the same.  Just CMPE, so log CMPE.
1717  *		     If addresses are different (this could happen
1718  *		     as a result of scrubbing.  Report each separately.
1719  *		     Only report error info on each side.
1720  *
1721  * - UE-UE           Addresses are the same.  Report MUE.
1722  *		     Addresses are different.  Report SUE on each bank.
1723  *		     Rewrite to clear UE.
1724  *
1725  * - UE-*	     UE-(CE/OK)
1726  *		     Rewrite to clear UE.  Report SUE for the bank.
1727  *
1728  * - CE-*            CE-(CE/OK).  Scrub to determine if CE is permanent.
1729  *                   If CE becomes UE or CMPE, go back to above.
1730  *
1731  */
1732 
1733 static int
1734 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat)
1735 {
1736 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1737 	int i;
1738 	int rv = 0;
1739 	int bank;
1740 	int rewrite_timeout = 0;
1741 
1742 	MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n",
1743 	    flt_stat[0].mf_cntl, flt_stat[1].mf_cntl);
1744 
1745 	if (ptrl_error) {
1746 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1747 		    MAC_CNTL_PTRL_ERRS) == 0)
1748 			return (0);
1749 	} else {
1750 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl) &
1751 		    MAC_CNTL_MI_ERRS) == 0)
1752 			return (0);
1753 	}
1754 
1755 	/*
1756 	 * First we take care of the case of CE
1757 	 * because they can become UE or CMPE
1758 	 */
1759 	for (i = 0; i < 2; i++) {
1760 		if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) {
1761 			bank = flt_stat[i].mf_flt_maddr.ma_bank;
1762 			MC_LOG("CE detected on bank %d\n", bank);
1763 			mc_scrub_ce(mcp, bank, &flt_stat[i], ptrl_error);
1764 			if (MC_REWRITE_ACTIVE(mcp, bank)) {
1765 				rewrite_timeout = 1;
1766 			}
1767 			rv = 1;
1768 		}
1769 	}
1770 
1771 	if (rewrite_timeout)
1772 		return (0);
1773 
1774 	/* The above scrubbing can turn CE into UE or CMPE */
1775 
1776 	/*
1777 	 * Now we distinguish two cases: same address or not
1778 	 * the same address.  It might seem more intuitive to
1779 	 * distinguish PTRL v.s. MI error but it is more
1780 	 * complicated that way.
1781 	 */
1782 
1783 	if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) {
1784 
1785 		if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) ||
1786 		    IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) {
1787 			flt_stat[0].mf_type = FLT_TYPE_CMPE;
1788 			flt_stat[1].mf_type = FLT_TYPE_CMPE;
1789 			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1790 			mc_aflt->mflt_nflts = 2;
1791 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1792 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1793 			mc_aflt->mflt_pr = PR_UE;
1794 			/*
1795 			 * Compare error is result of MAC internal error, so
1796 			 * simply log it instead of publishing an ereport. SCF
1797 			 * diagnoses all the MAC internal and its i/f error.
1798 			 */
1799 			MC_LOG("cmpe error detected\n");
1800 			return (1);
1801 		}
1802 
1803 		if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) &&
1804 		    IS_UE(flt_stat[1].mf_cntl, ptrl_error)) {
1805 			/* Both side are UE's */
1806 
1807 			MAC_SET_ERRLOG_INFO(&flt_stat[0]);
1808 			MAC_SET_ERRLOG_INFO(&flt_stat[1]);
1809 			MC_LOG("MUE detected\n");
1810 			flt_stat[0].mf_type = FLT_TYPE_MUE;
1811 			flt_stat[1].mf_type = FLT_TYPE_MUE;
1812 			mc_aflt->mflt_erpt_class = MC_OPL_MUE;
1813 			mc_aflt->mflt_nflts = 2;
1814 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1815 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1816 			mc_aflt->mflt_pr = PR_UE;
1817 			mc_err_drain(mc_aflt);
1818 			return (1);
1819 		}
1820 
1821 		/* Now the only case is UE/CE, UE/OK, or don't care */
1822 		for (i = 0; i < 2; i++) {
1823 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1824 
1825 			/* rewrite can clear the one side UE error */
1826 
1827 			if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) {
1828 				(void) do_rewrite(mcp,
1829 				    flt_stat[i].mf_flt_maddr.ma_bank,
1830 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr, 0);
1831 			}
1832 			flt_stat[i].mf_type = FLT_TYPE_UE;
1833 			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1834 			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1835 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1836 			mc_aflt->mflt_nflts = 1;
1837 			mc_aflt->mflt_pr = PR_MCE;
1838 			mc_err_drain(mc_aflt);
1839 			/* Once we hit a UE/CE or UE/OK case, done */
1840 			return (1);
1841 			}
1842 		}
1843 
1844 	} else {
1845 		/*
1846 		 * addresses are different. That means errors
1847 		 * on the 2 banks are not related at all.
1848 		 */
1849 		for (i = 0; i < 2; i++) {
1850 			if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) {
1851 				flt_stat[i].mf_type = FLT_TYPE_CMPE;
1852 				mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1853 				mc_aflt->mflt_nflts = 1;
1854 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1855 				mc_aflt->mflt_pr = PR_UE;
1856 				/*
1857 				 * Compare error is result of MAC internal
1858 				 * error, so simply log it instead of
1859 				 * publishing an ereport. SCF diagnoses all
1860 				 * the MAC internal and its interface error.
1861 				 */
1862 				MC_LOG("cmpe error detected\n");
1863 				/* no more report on this bank */
1864 				flt_stat[i].mf_cntl = 0;
1865 				rv = 1;
1866 			}
1867 		}
1868 
1869 		/* rewrite can clear the one side UE error */
1870 
1871 		for (i = 0; i < 2; i++) {
1872 			if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1873 				(void) do_rewrite(mcp,
1874 				    flt_stat[i].mf_flt_maddr.ma_bank,
1875 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr,
1876 				    0);
1877 				flt_stat[i].mf_type = FLT_TYPE_UE;
1878 				MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1879 				mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1880 				mc_aflt->mflt_stat[0] = &flt_stat[i];
1881 				mc_aflt->mflt_nflts = 1;
1882 				mc_aflt->mflt_pr = PR_MCE;
1883 				mc_err_drain(mc_aflt);
1884 				rv = 1;
1885 			}
1886 		}
1887 	}
1888 	return (rv);
1889 }
1890 static void
1891 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
1892 {
1893 	mc_aflt_t mc_aflt;
1894 	mc_flt_stat_t flt_stat[2], mi_flt_stat[2];
1895 	int i;
1896 	int mi_valid;
1897 
1898 	ASSERT(rsaddr);
1899 
1900 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1901 	bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t));
1902 	bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t));
1903 
1904 
1905 	mc_aflt.mflt_mcp = mcp;
1906 	mc_aflt.mflt_id = gethrtime();
1907 
1908 	/* Now read all the registers into flt_stat */
1909 
1910 	for (i = 0; i < 2; i++) {
1911 		MC_LOG("Reading registers of bank %d\n", bank);
1912 		/* patrol registers */
1913 		mc_read_ptrl_reg(mcp, bank, &flt_stat[i]);
1914 
1915 		/*
1916 		 * In mirror mode, it is possible that only one bank
1917 		 * may report the error. We need to check for it to
1918 		 * ensure we pick the right addr value for patrol restart.
1919 		 * Note that if both banks reported errors, we pick the
1920 		 * 2nd one. Both banks should reported the same error address.
1921 		 */
1922 		if (flt_stat[i].mf_cntl & MAC_CNTL_PTRL_ERRS)
1923 			rsaddr->mi_restartaddr = flt_stat[i].mf_flt_maddr;
1924 
1925 		MC_LOG("ptrl registers cntl %x add %x log %x\n",
1926 		    flt_stat[i].mf_cntl, flt_stat[i].mf_err_add,
1927 		    flt_stat[i].mf_err_log);
1928 
1929 		/* MI registers */
1930 		mc_read_mi_reg(mcp, bank, &mi_flt_stat[i]);
1931 
1932 		MC_LOG("MI registers cntl %x add %x log %x\n",
1933 		    mi_flt_stat[i].mf_cntl, mi_flt_stat[i].mf_err_add,
1934 		    mi_flt_stat[i].mf_err_log);
1935 
1936 		bank = bank^1;
1937 	}
1938 
1939 	/* clear errors once we read all the registers */
1940 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1941 
1942 	MAC_CLEAR_ERRS(mcp, bank ^ 1, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1943 
1944 	/* Process MI errors first */
1945 
1946 	/* if not error mode, cntl1 is 0 */
1947 	if ((mi_flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1948 	    (mi_flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1949 		mi_flt_stat[0].mf_cntl = 0;
1950 
1951 	if ((mi_flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1952 	    (mi_flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1953 		mi_flt_stat[1].mf_cntl = 0;
1954 
1955 	mc_aflt.mflt_is_ptrl = 0;
1956 	mi_valid = mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]);
1957 
1958 	if ((((flt_stat[0].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1959 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[0].mf_cntl &
1960 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1961 	    (flt_stat[0].mf_err_add ==
1962 	    ROUNDDOWN(mi_flt_stat[0].mf_err_add, MC_BOUND_BYTE)) &&
1963 	    (((flt_stat[1].mf_cntl & MAC_CNTL_PTRL_ERRS) >>
1964 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat[1].mf_cntl &
1965 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
1966 	    (flt_stat[1].mf_err_add ==
1967 	    ROUNDDOWN(mi_flt_stat[1].mf_err_add, MC_BOUND_BYTE))) {
1968 #ifdef DEBUG
1969 		MC_LOG("discarding PTRL error because "
1970 		    "it is the same as MI\n");
1971 #endif
1972 		rsaddr->mi_valid = mi_valid;
1973 		return;
1974 	}
1975 	/* if not error mode, cntl1 is 0 */
1976 	if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1977 	    (flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1978 		flt_stat[0].mf_cntl = 0;
1979 
1980 	if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1981 	    (flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1982 		flt_stat[1].mf_cntl = 0;
1983 
1984 	mc_aflt.mflt_is_ptrl = 1;
1985 	rsaddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]);
1986 }
1987 static int
1988 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt,
1989 	mc_flt_stat_t *flt_stat)
1990 {
1991 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1992 	int rv = 0;
1993 
1994 	mc_aflt->mflt_erpt_class = NULL;
1995 	if (IS_UE(flt_stat->mf_cntl, ptrl_error)) {
1996 		MC_LOG("UE detected\n");
1997 		flt_stat->mf_type = FLT_TYPE_UE;
1998 		mc_aflt->mflt_erpt_class = MC_OPL_UE;
1999 		mc_aflt->mflt_pr = PR_UE;
2000 		MAC_SET_ERRLOG_INFO(flt_stat);
2001 		rv = 1;
2002 	} else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) {
2003 		MC_LOG("CE detected\n");
2004 		MAC_SET_ERRLOG_INFO(flt_stat);
2005 
2006 		/* Error type can change after scrubbing */
2007 		mc_scrub_ce(mcp, bank, flt_stat, ptrl_error);
2008 		if (MC_REWRITE_ACTIVE(mcp, bank)) {
2009 			return (0);
2010 		}
2011 
2012 		if (flt_stat->mf_type == FLT_TYPE_INTERMITTENT_CE) {
2013 			mc_aflt->mflt_erpt_class = MC_OPL_ICE;
2014 			mc_aflt->mflt_pr = PR_MCE;
2015 		} else if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
2016 			mc_aflt->mflt_erpt_class = MC_OPL_CE;
2017 			mc_aflt->mflt_pr = PR_MCE;
2018 		} else if (flt_stat->mf_type == FLT_TYPE_UE) {
2019 			mc_aflt->mflt_erpt_class = MC_OPL_UE;
2020 			mc_aflt->mflt_pr = PR_UE;
2021 		}
2022 		rv = 1;
2023 	}
2024 	MC_LOG("mc_process_error: fault type %x erpt %s\n", flt_stat->mf_type,
2025 	    mc_aflt->mflt_erpt_class);
2026 	if (mc_aflt->mflt_erpt_class) {
2027 		mc_aflt->mflt_stat[0] = flt_stat;
2028 		mc_aflt->mflt_nflts = 1;
2029 		mc_err_drain(mc_aflt);
2030 	}
2031 	return (rv);
2032 }
2033 
2034 static void
2035 mc_error_handler(mc_opl_t *mcp, int bank, mc_rsaddr_info_t *rsaddr)
2036 {
2037 	mc_aflt_t mc_aflt;
2038 	mc_flt_stat_t flt_stat, mi_flt_stat;
2039 	int mi_valid;
2040 
2041 	bzero(&mc_aflt, sizeof (mc_aflt_t));
2042 	bzero(&flt_stat, sizeof (mc_flt_stat_t));
2043 	bzero(&mi_flt_stat, sizeof (mc_flt_stat_t));
2044 
2045 	mc_aflt.mflt_mcp = mcp;
2046 	mc_aflt.mflt_id = gethrtime();
2047 
2048 	/* patrol registers */
2049 	mc_read_ptrl_reg(mcp, bank, &flt_stat);
2050 
2051 	ASSERT(rsaddr);
2052 	rsaddr->mi_restartaddr = flt_stat.mf_flt_maddr;
2053 
2054 	MC_LOG("ptrl registers cntl %x add %x log %x\n", flt_stat.mf_cntl,
2055 	    flt_stat.mf_err_add, flt_stat.mf_err_log);
2056 
2057 	/* MI registers */
2058 	mc_read_mi_reg(mcp, bank, &mi_flt_stat);
2059 
2060 
2061 	MC_LOG("MI registers cntl %x add %x log %x\n", mi_flt_stat.mf_cntl,
2062 	    mi_flt_stat.mf_err_add, mi_flt_stat.mf_err_log);
2063 
2064 	/* clear errors once we read all the registers */
2065 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
2066 
2067 	mc_aflt.mflt_is_ptrl = 0;
2068 	if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) &&
2069 	    ((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2070 	    ((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2071 		mi_valid = mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat);
2072 	}
2073 
2074 	if ((((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) >>
2075 	    MAC_CNTL_PTRL_ERR_SHIFT) == ((mi_flt_stat.mf_cntl &
2076 	    MAC_CNTL_MI_ERRS) >> MAC_CNTL_MI_ERR_SHIFT)) &&
2077 	    (flt_stat.mf_err_add ==
2078 	    ROUNDDOWN(mi_flt_stat.mf_err_add, MC_BOUND_BYTE))) {
2079 #ifdef DEBUG
2080 		MC_LOG("discarding PTRL error because "
2081 		    "it is the same as MI\n");
2082 #endif
2083 		rsaddr->mi_valid = mi_valid;
2084 		return;
2085 	}
2086 
2087 	mc_aflt.mflt_is_ptrl = 1;
2088 	if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) &&
2089 	    ((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
2090 	    ((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
2091 		rsaddr->mi_valid = mc_process_error(mcp, bank, &mc_aflt,
2092 		    &flt_stat);
2093 	}
2094 }
2095 /*
2096  *	memory patrol error handling algorithm:
2097  *	timeout() is used to do periodic polling
2098  *	This is the flow chart.
2099  *	timeout ->
2100  *	mc_check_errors()
2101  *	    if memory bank is installed, read the status register
2102  *	    if any error bit is set,
2103  *	    -> mc_error_handler()
2104  *		-> read all error registers
2105  *	        -> mc_process_error()
2106  *	            determine error type
2107  *	            rewrite to clear error or scrub to determine CE type
2108  *	            inform SCF on permanent CE
2109  *	        -> mc_err_drain
2110  *	            page offline processing
2111  *	            -> mc_ereport_post()
2112  */
2113 
2114 static void
2115 mc_process_rewrite(mc_opl_t *mcp, int bank)
2116 {
2117 	uint32_t rew_addr, cntl;
2118 	mc_retry_info_t *retry;
2119 	struct mc_bank *bankp;
2120 
2121 	bankp = &(mcp->mc_bank[bank]);
2122 	retry = bankp->mcb_active;
2123 	if (retry == NULL)
2124 		return;
2125 
2126 	if (retry->ri_state <= RETRY_STATE_ACTIVE) {
2127 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
2128 		if (cntl & MAC_CNTL_PTRL_STATUS)
2129 			return;
2130 		rew_addr = retry->ri_addr;
2131 		ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), rew_addr);
2132 		MAC_REW_REQ(mcp, bank);
2133 
2134 		retry->ri_state = RETRY_STATE_REWRITE;
2135 	}
2136 
2137 	cntl = ldphysio(MAC_PTRL_CNTL(mcp, bank));
2138 
2139 	if (cntl & MAC_CNTL_REW_END) {
2140 		MAC_CLEAR_ERRS(mcp, bank,
2141 		    MAC_CNTL_REW_ERRS);
2142 		mc_clear_rewrite(mcp, bank);
2143 	} else {
2144 		/*
2145 		 * If the rewrite does not complete in
2146 		 * 1 hour, we have to consider this a HW
2147 		 * failure.  However, there is no recovery
2148 		 * mechanism.  The only thing we can do
2149 		 * to to print a warning message to the
2150 		 * console.  We continue to increment the
2151 		 * counter but we only print the message
2152 		 * once.  It will take the counter a long
2153 		 * time to wrap around and the user might
2154 		 * see a second message.  In practice,
2155 		 * we have never hit this condition but
2156 		 * we have to keep the code here just in case.
2157 		 */
2158 		if (++mcp->mc_bank[bank].mcb_rewrite_count
2159 		    == mc_max_rewrite_retry) {
2160 			cmn_err(CE_WARN, "Memory patrol feature is"
2161 			" partly suspended on /LSB%d/B%d"
2162 			" due to heavy memory load,"
2163 			" and it will restart"
2164 			" automatically.\n", mcp->mc_board_num,
2165 			    bank);
2166 		}
2167 	}
2168 }
2169 
2170 static void
2171 mc_check_errors_func(mc_opl_t *mcp)
2172 {
2173 	mc_rsaddr_info_t rsaddr_info;
2174 	int i, error_count = 0;
2175 	uint32_t stat, cntl;
2176 	int running;
2177 	int wrapped;
2178 	int ebk;
2179 
2180 	/*
2181 	 * scan errors.
2182 	 */
2183 	if (mcp->mc_status & MC_MEMORYLESS)
2184 		return;
2185 
2186 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2187 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2188 			if (MC_REWRITE_ACTIVE(mcp, i)) {
2189 				mc_process_rewrite(mcp, i);
2190 			}
2191 			stat = ldphysio(MAC_PTRL_STAT(mcp, i));
2192 			cntl = ldphysio(MAC_PTRL_CNTL(mcp, i));
2193 			running = cntl & MAC_CNTL_PTRL_START;
2194 			wrapped = cntl & MAC_CNTL_PTRL_ADD_MAX;
2195 
2196 			/* Compute the effective bank idx */
2197 			ebk = (IS_MIRROR(mcp, i)) ? MIRROR_IDX(i) : i;
2198 
2199 			if (mc_debug_show_all || stat) {
2200 				MC_LOG("/LSB%d/B%d stat %x cntl %x\n",
2201 				    mcp->mc_board_num, i, stat, cntl);
2202 			}
2203 
2204 			/*
2205 			 * Update stats and reset flag if the HW patrol
2206 			 * wrapped around in its scan.
2207 			 */
2208 			if (wrapped) {
2209 				MAC_CLEAR_MAX(mcp, i);
2210 				mcp->mc_period[ebk]++;
2211 				if (IS_MIRROR(mcp, i)) {
2212 					MC_LOG("mirror mc period %ld on "
2213 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2214 					    mcp->mc_board_num, i);
2215 				} else {
2216 					MC_LOG("mc period %ld on "
2217 					    "/LSB%d/B%d\n", mcp->mc_period[ebk],
2218 					    mcp->mc_board_num, i);
2219 				}
2220 			}
2221 
2222 			if (running) {
2223 				/*
2224 				 * Mac patrol HW is still running.
2225 				 * Normally when an error is detected,
2226 				 * the HW patrol will stop so that we
2227 				 * can collect error data for reporting.
2228 				 * Certain errors (MI errors) detected may not
2229 				 * cause the HW patrol to stop which is a
2230 				 * problem since we cannot read error data while
2231 				 * the HW patrol is running. SW is not allowed
2232 				 * to stop the HW patrol while it is running
2233 				 * as it may cause HW inconsistency. This is
2234 				 * described in a HW errata.
2235 				 * In situations where we detected errors
2236 				 * that may not cause the HW patrol to stop.
2237 				 * We speed up the HW patrol scanning in
2238 				 * the hope that it will find the 'real' PTRL
2239 				 * errors associated with the previous errors
2240 				 * causing the HW to finally stop so that we
2241 				 * can do the reporting.
2242 				 */
2243 				/*
2244 				 * Check to see if we did speed up
2245 				 * the HW patrol due to previous errors
2246 				 * detected that did not cause the patrol
2247 				 * to stop. We only do it if HW patrol scan
2248 				 * wrapped (counted as completing a 'period').
2249 				 */
2250 				if (mcp->mc_speedup_period[ebk] > 0) {
2251 					if (wrapped &&
2252 					    (--mcp->mc_speedup_period[ebk] ==
2253 					    0)) {
2254 						/*
2255 						 * We did try to speed up.
2256 						 * The speed up period has
2257 						 * expired and the HW patrol
2258 						 * is still running.  The
2259 						 * errors must be intermittent.
2260 						 * We have no choice but to
2261 						 * ignore them, reset the scan
2262 						 * speed to normal and clear
2263 						 * the MI error bits. For
2264 						 * mirror mode, we need to
2265 						 * clear errors on both banks.
2266 						 */
2267 						MC_LOG("Clearing MI errors\n");
2268 						MAC_CLEAR_ERRS(mcp, i,
2269 						    MAC_CNTL_MI_ERRS);
2270 
2271 						if (IS_MIRROR(mcp, i)) {
2272 							MC_LOG("Clearing "
2273 							    "Mirror MI errs\n");
2274 							MAC_CLEAR_ERRS(mcp,
2275 							    i^1,
2276 							    MAC_CNTL_MI_ERRS);
2277 						}
2278 					}
2279 				} else if (stat & MAC_STAT_MI_ERRS) {
2280 					/*
2281 					 * MI errors detected but we cannot
2282 					 * report them since the HW patrol
2283 					 * is still running.
2284 					 * We will attempt to speed up the
2285 					 * scanning and hopefully the HW
2286 					 * can detect PRTL errors at the same
2287 					 * location that cause the HW patrol
2288 					 * to stop.
2289 					 */
2290 					mcp->mc_speedup_period[ebk] = 2;
2291 					MAC_CMD(mcp, i, 0);
2292 				}
2293 			} else if (stat & (MAC_STAT_PTRL_ERRS |
2294 			    MAC_STAT_MI_ERRS)) {
2295 				/*
2296 				 * HW Patrol has stopped and we found errors.
2297 				 * Proceed to collect and report error info.
2298 				 */
2299 				mcp->mc_speedup_period[ebk] = 0;
2300 				rsaddr_info.mi_valid = 0;
2301 				rsaddr_info.mi_injectrestart = 0;
2302 				if (IS_MIRROR(mcp, i)) {
2303 					mc_error_handler_mir(mcp, i,
2304 					    &rsaddr_info);
2305 				} else {
2306 					mc_error_handler(mcp, i, &rsaddr_info);
2307 				}
2308 
2309 				error_count++;
2310 				(void) restart_patrol(mcp, i, &rsaddr_info);
2311 			} else {
2312 				/*
2313 				 * HW patrol scan has apparently stopped
2314 				 * but no errors detected/flagged.
2315 				 * Restart the HW patrol just to be sure.
2316 				 * In mirror mode, the odd bank might have
2317 				 * reported errors that caused the patrol to
2318 				 * stop. We'll defer the restart to the odd
2319 				 * bank in this case.
2320 				 */
2321 				if (!IS_MIRROR(mcp, i) || (i & 0x1))
2322 					(void) restart_patrol(mcp, i, NULL);
2323 			}
2324 		}
2325 	}
2326 	if (error_count > 0)
2327 		mcp->mc_last_error += error_count;
2328 	else
2329 		mcp->mc_last_error = 0;
2330 }
2331 
2332 /*
2333  * mc_polling -- Check errors for only one instance,
2334  * but process errors for all instances to make sure we drain the errors
2335  * faster than they can be accumulated.
2336  *
2337  * Polling on each board should be done only once per each
2338  * mc_patrol_interval_sec.  This is equivalent to setting mc_tick_left
2339  * to OPL_MAX_BOARDS and decrement by 1 on each timeout.
2340  * Once mc_tick_left becomes negative, the board becomes a candidate
2341  * for polling because it has waited for at least
2342  * mc_patrol_interval_sec's long.    If mc_timeout_period is calculated
2343  * differently, this has to be updated accordingly.
2344  */
2345 
2346 static void
2347 mc_polling(void)
2348 {
2349 	int i, scan_error;
2350 	mc_opl_t *mcp;
2351 
2352 
2353 	scan_error = 1;
2354 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2355 		mutex_enter(&mcmutex);
2356 		if ((mcp = mc_instances[i]) == NULL) {
2357 			mutex_exit(&mcmutex);
2358 			continue;
2359 		}
2360 		mutex_enter(&mcp->mc_lock);
2361 		mutex_exit(&mcmutex);
2362 		if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2363 			mutex_exit(&mcp->mc_lock);
2364 			continue;
2365 		}
2366 		if (scan_error && mcp->mc_tick_left <= 0) {
2367 			mc_check_errors_func((void *)mcp);
2368 			mcp->mc_tick_left = OPL_MAX_BOARDS;
2369 			scan_error = 0;
2370 		} else {
2371 			mcp->mc_tick_left--;
2372 		}
2373 		mc_process_scf_log(mcp);
2374 		mutex_exit(&mcp->mc_lock);
2375 	}
2376 }
2377 
2378 static void
2379 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr)
2380 {
2381 	maddr->ma_bd = mcp->mc_board_num;
2382 	maddr->ma_bank = bank;
2383 	maddr->ma_dimm_addr = 0;
2384 }
2385 
2386 typedef struct mc_mem_range {
2387 	uint64_t	addr;
2388 	uint64_t	size;
2389 } mc_mem_range_t;
2390 
2391 static int
2392 get_base_address(mc_opl_t *mcp)
2393 {
2394 	mc_mem_range_t *mem_range;
2395 	int len;
2396 
2397 	if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2398 	    "sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) {
2399 		return (DDI_FAILURE);
2400 	}
2401 
2402 	mcp->mc_start_address = mem_range->addr;
2403 	mcp->mc_size = mem_range->size;
2404 
2405 	kmem_free(mem_range, len);
2406 	return (DDI_SUCCESS);
2407 }
2408 
2409 struct mc_addr_spec {
2410 	uint32_t bank;
2411 	uint32_t phys_hi;
2412 	uint32_t phys_lo;
2413 };
2414 
2415 #define	REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo)
2416 
2417 static char *mc_tbl_name[] = {
2418 	"cs0-mc-pa-trans-table",
2419 	"cs1-mc-pa-trans-table"
2420 };
2421 
2422 /*
2423  * This routine performs a rangecheck for a given PA
2424  * to see if it belongs to the memory range for this board.
2425  * Return 1 if it is valid (within the range) and 0 otherwise
2426  */
2427 static int
2428 mc_rangecheck_pa(mc_opl_t *mcp, uint64_t pa)
2429 {
2430 	if ((pa < mcp->mc_start_address) || (mcp->mc_start_address +
2431 	    mcp->mc_size <= pa))
2432 		return (0);
2433 	else
2434 		return (1);
2435 }
2436 
2437 static void
2438 mc_memlist_delete(struct memlist *mlist)
2439 {
2440 	struct memlist *ml;
2441 
2442 	for (ml = mlist; ml; ml = mlist) {
2443 		mlist = ml->ml_next;
2444 		kmem_free(ml, sizeof (struct memlist));
2445 	}
2446 }
2447 
2448 static struct memlist *
2449 mc_memlist_dup(struct memlist *mlist)
2450 {
2451 	struct memlist *hl = NULL, *tl, **mlp;
2452 
2453 	if (mlist == NULL)
2454 		return (NULL);
2455 
2456 	mlp = &hl;
2457 	tl = *mlp;
2458 	for (; mlist; mlist = mlist->ml_next) {
2459 		*mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP);
2460 		(*mlp)->ml_address = mlist->ml_address;
2461 		(*mlp)->ml_size = mlist->ml_size;
2462 		(*mlp)->ml_prev = tl;
2463 		tl = *mlp;
2464 		mlp = &((*mlp)->ml_next);
2465 	}
2466 	*mlp = NULL;
2467 
2468 	return (hl);
2469 }
2470 
2471 
2472 static struct memlist *
2473 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
2474 {
2475 	uint64_t	end;
2476 	struct memlist	*ml, *tl, *nlp;
2477 
2478 	if (mlist == NULL)
2479 		return (NULL);
2480 
2481 	end = base + len;
2482 	if ((end <= mlist->ml_address) || (base == end))
2483 		return (mlist);
2484 
2485 	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
2486 		uint64_t	mend;
2487 
2488 		nlp = ml->ml_next;
2489 
2490 		if (end <= ml->ml_address)
2491 			break;
2492 
2493 		mend = ml->ml_address + ml->ml_size;
2494 		if (base < mend) {
2495 			if (base <= ml->ml_address) {
2496 				ml->ml_address = end;
2497 				if (end >= mend)
2498 					ml->ml_size = 0ull;
2499 				else
2500 					ml->ml_size = mend - ml->ml_address;
2501 			} else {
2502 				ml->ml_size = base - ml->ml_address;
2503 				if (end < mend) {
2504 					struct memlist	*nl;
2505 					/*
2506 					 * splitting an memlist entry.
2507 					 */
2508 					nl = kmem_alloc(sizeof (struct memlist),
2509 					    KM_SLEEP);
2510 					nl->ml_address = end;
2511 					nl->ml_size = mend - nl->ml_address;
2512 					if ((nl->ml_next = nlp) != NULL)
2513 						nlp->ml_prev = nl;
2514 					nl->ml_prev = ml;
2515 					ml->ml_next = nl;
2516 					nlp = nl;
2517 				}
2518 			}
2519 			if (ml->ml_size == 0ull) {
2520 				if (ml == mlist) {
2521 					if ((mlist = nlp) != NULL)
2522 						nlp->ml_prev = NULL;
2523 					kmem_free(ml, sizeof (struct memlist));
2524 					if (mlist == NULL)
2525 						break;
2526 					ml = nlp;
2527 				} else {
2528 					if ((tl->ml_next = nlp) != NULL)
2529 						nlp->ml_prev = tl;
2530 					kmem_free(ml, sizeof (struct memlist));
2531 					ml = tl;
2532 				}
2533 			}
2534 		}
2535 	}
2536 
2537 	return (mlist);
2538 }
2539 
2540 static void
2541 mc_get_mlist(mc_opl_t *mcp)
2542 {
2543 	struct memlist *mlist;
2544 
2545 	memlist_read_lock();
2546 	mlist = mc_memlist_dup(phys_install);
2547 	memlist_read_unlock();
2548 
2549 	if (mlist) {
2550 		mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address);
2551 	}
2552 
2553 	if (mlist) {
2554 		uint64_t startpa, endpa;
2555 
2556 		startpa = mcp->mc_start_address + mcp->mc_size;
2557 		endpa = ptob(physmax + 1);
2558 		if (endpa > startpa) {
2559 			mlist = mc_memlist_del_span(mlist, startpa,
2560 			    endpa - startpa);
2561 		}
2562 	}
2563 
2564 	if (mlist) {
2565 		mcp->mlist = mlist;
2566 	}
2567 }
2568 
2569 int
2570 mc_board_add(mc_opl_t *mcp)
2571 {
2572 	struct mc_addr_spec *macaddr;
2573 	cs_status_t *cs_status;
2574 	int len, len1, i, bk, cc;
2575 	mc_rsaddr_info_t rsaddr;
2576 	uint32_t mirr;
2577 	int nbanks = 0;
2578 	uint64_t nbytes = 0;
2579 	int mirror_mode = 0;
2580 	int ret;
2581 
2582 	/*
2583 	 * Get configurations from "pseudo-mc" node which includes:
2584 	 * board# : LSB number
2585 	 * mac-addr : physical base address of MAC registers
2586 	 * csX-mac-pa-trans-table: translation table from DIMM address
2587 	 *			to physical address or vice versa.
2588 	 */
2589 	mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip,
2590 	    DDI_PROP_DONTPASS, "board#", -1);
2591 
2592 	if (mcp->mc_board_num == -1) {
2593 		return (DDI_FAILURE);
2594 	}
2595 
2596 	/*
2597 	 * Get start address in this CAB. It can be gotten from
2598 	 * "sb-mem-ranges" property.
2599 	 */
2600 
2601 	if (get_base_address(mcp) == DDI_FAILURE) {
2602 		return (DDI_FAILURE);
2603 	}
2604 	/* get mac-pa trans tables */
2605 	for (i = 0; i < MC_TT_CS; i++) {
2606 		len = MC_TT_ENTRIES;
2607 		cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip,
2608 		    DDI_PROP_DONTPASS, mc_tbl_name[i],
2609 		    (caddr_t)mcp->mc_trans_table[i], &len);
2610 
2611 		if (cc != DDI_SUCCESS) {
2612 			bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES);
2613 		}
2614 	}
2615 	mcp->mlist = NULL;
2616 
2617 	mc_get_mlist(mcp);
2618 
2619 	/* initialize bank informations */
2620 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2621 	    "mc-addr", (caddr_t)&macaddr, &len);
2622 	if (cc != DDI_SUCCESS) {
2623 		cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc);
2624 		return (DDI_FAILURE);
2625 	}
2626 
2627 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
2628 	    "cs-status", (caddr_t)&cs_status, &len1);
2629 
2630 	if (cc != DDI_SUCCESS) {
2631 		if (len > 0)
2632 			kmem_free(macaddr, len);
2633 		cmn_err(CE_WARN, "Cannot get cs-status. err=%d\n", cc);
2634 		return (DDI_FAILURE);
2635 	}
2636 	/* get the physical board number for a given logical board number */
2637 	mcp->mc_phys_board_num = mc_opl_get_physical_board(mcp->mc_board_num);
2638 
2639 	if (mcp->mc_phys_board_num < 0) {
2640 		if (len > 0)
2641 			kmem_free(macaddr, len);
2642 		cmn_err(CE_WARN, "Unable to obtain the physical board number");
2643 		return (DDI_FAILURE);
2644 	}
2645 
2646 	mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL);
2647 
2648 	for (i = 0; i < len1 / sizeof (cs_status_t); i++) {
2649 		nbytes += ((uint64_t)cs_status[i].cs_avail_hi << 32) |
2650 		    ((uint64_t)cs_status[i].cs_avail_low);
2651 	}
2652 	if (len1 > 0)
2653 		kmem_free(cs_status, len1);
2654 	nbanks = len / sizeof (struct mc_addr_spec);
2655 
2656 	if (nbanks > 0)
2657 		nbytes /= nbanks;
2658 	else {
2659 		/* No need to free macaddr because len must be 0 */
2660 		mcp->mc_status |= MC_MEMORYLESS;
2661 		return (DDI_SUCCESS);
2662 	}
2663 
2664 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2665 		mcp->mc_scf_retry[i] = 0;
2666 		mcp->mc_period[i] = 0;
2667 		mcp->mc_speedup_period[i] = 0;
2668 	}
2669 
2670 	/*
2671 	 * Get the memory size here. Let it be B (bytes).
2672 	 * Let T be the time in u.s. to scan 64 bytes.
2673 	 * If we want to complete 1 round of scanning in P seconds.
2674 	 *
2675 	 *	B * T * 10^(-6)	= P
2676 	 *	---------------
2677 	 *		64
2678 	 *
2679 	 *	T = P * 64 * 10^6
2680 	 *	    -------------
2681 	 *		B
2682 	 *
2683 	 *	  = P * 64 * 10^6
2684 	 *	    -------------
2685 	 *		B
2686 	 *
2687 	 *	The timing bits are set in PTRL_CNTL[28:26] where
2688 	 *
2689 	 *	0	- 1 m.s
2690 	 *	1	- 512 u.s.
2691 	 *	10	- 256 u.s.
2692 	 *	11	- 128 u.s.
2693 	 *	100	- 64 u.s.
2694 	 *	101	- 32 u.s.
2695 	 *	110	- 0 u.s.
2696 	 *	111	- reserved.
2697 	 *
2698 	 *
2699 	 *	a[0] = 110, a[1] = 101, ... a[6] = 0
2700 	 *
2701 	 *	cs-status property is int x 7
2702 	 *	0 - cs#
2703 	 *	1 - cs-status
2704 	 *	2 - cs-avail.hi
2705 	 *	3 - cs-avail.lo
2706 	 *	4 - dimm-capa.hi
2707 	 *	5 - dimm-capa.lo
2708 	 *	6 - #of dimms
2709 	 */
2710 
2711 	if (nbytes > 0) {
2712 		int i;
2713 		uint64_t ms;
2714 		ms = ((uint64_t)mc_scan_period * 64 * 1000000)/nbytes;
2715 		mcp->mc_speed = mc_scan_speeds[MC_MAX_SPEEDS - 1].mc_speeds;
2716 		for (i = 0; i < MC_MAX_SPEEDS - 1; i++) {
2717 			if (ms < mc_scan_speeds[i + 1].mc_period) {
2718 				mcp->mc_speed = mc_scan_speeds[i].mc_speeds;
2719 				break;
2720 			}
2721 		}
2722 	} else
2723 		mcp->mc_speed = 0;
2724 
2725 
2726 	for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) {
2727 		struct mc_bank *bankp;
2728 		mc_retry_info_t *retry;
2729 		uint32_t reg;
2730 		int k;
2731 
2732 		/*
2733 		 * setup bank
2734 		 */
2735 		bk = macaddr[i].bank;
2736 		bankp = &(mcp->mc_bank[bk]);
2737 		bankp->mcb_status = BANK_INSTALLED;
2738 		bankp->mcb_reg_base = REGS_PA(macaddr, i);
2739 
2740 		bankp->mcb_retry_freelist = NULL;
2741 		bankp->mcb_retry_pending = NULL;
2742 		bankp->mcb_active = NULL;
2743 		retry = &bankp->mcb_retry_infos[0];
2744 		for (k = 0; k < MC_RETRY_COUNT; k++, retry++) {
2745 			mc_retry_info_put(&bankp->mcb_retry_freelist, retry);
2746 		}
2747 
2748 		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk));
2749 		bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS);
2750 
2751 		/*
2752 		 * check if mirror mode
2753 		 */
2754 		mirr = LD_MAC_REG(MAC_MIRR(mcp, bk));
2755 
2756 		if (mirr & MAC_MIRR_MIRROR_MODE) {
2757 			MC_LOG("Mirror -> /LSB%d/B%d\n", mcp->mc_board_num,
2758 			    bk);
2759 			bankp->mcb_status |= BANK_MIRROR_MODE;
2760 			mirror_mode = 1;
2761 			/*
2762 			 * The following bit is only used for
2763 			 * error injection.  We should clear it
2764 			 */
2765 			if (mirr & MAC_MIRR_BANK_EXCLUSIVE)
2766 				ST_MAC_REG(MAC_MIRR(mcp, bk), 0);
2767 		}
2768 
2769 		/*
2770 		 * restart if not mirror mode or the other bank
2771 		 * of the mirror is not running
2772 		 */
2773 		if (!(mirr & MAC_MIRR_MIRROR_MODE) ||
2774 		    !(mcp->mc_bank[bk^1].mcb_status & BANK_PTRL_RUNNING)) {
2775 			MC_LOG("Starting up /LSB%d/B%d\n", mcp->mc_board_num,
2776 			    bk);
2777 			get_ptrl_start_address(mcp, bk, &rsaddr.mi_restartaddr);
2778 			rsaddr.mi_valid = 0;
2779 			rsaddr.mi_injectrestart = 0;
2780 			(void) restart_patrol(mcp, bk, &rsaddr);
2781 		} else {
2782 			MC_LOG("Not starting up /LSB%d/B%d\n",
2783 			    mcp->mc_board_num, bk);
2784 		}
2785 		bankp->mcb_status |= BANK_PTRL_RUNNING;
2786 	}
2787 	if (len > 0)
2788 		kmem_free(macaddr, len);
2789 
2790 	ret = ndi_prop_update_int(DDI_DEV_T_NONE, mcp->mc_dip, "mirror-mode",
2791 	    mirror_mode);
2792 	if (ret != DDI_PROP_SUCCESS) {
2793 		cmn_err(CE_WARN, "Unable to update mirror-mode property");
2794 	}
2795 
2796 	mcp->mc_dimm_list = mc_get_dimm_list(mcp);
2797 
2798 	/*
2799 	 * set interval in HZ.
2800 	 */
2801 	mcp->mc_last_error = 0;
2802 
2803 	/* restart memory patrol checking */
2804 	mcp->mc_status |= MC_POLL_RUNNING;
2805 
2806 	return (DDI_SUCCESS);
2807 }
2808 
2809 int
2810 mc_board_del(mc_opl_t *mcp)
2811 {
2812 	int i;
2813 	scf_log_t *p;
2814 
2815 	/*
2816 	 * cleanup mac state
2817 	 */
2818 	mutex_enter(&mcp->mc_lock);
2819 	if (mcp->mc_status & MC_MEMORYLESS) {
2820 		mutex_exit(&mcp->mc_lock);
2821 		mutex_destroy(&mcp->mc_lock);
2822 		return (DDI_SUCCESS);
2823 	}
2824 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2825 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2826 			mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED;
2827 		}
2828 	}
2829 
2830 	/* stop memory patrol checking */
2831 	mcp->mc_status &= ~MC_POLL_RUNNING;
2832 
2833 	/* just throw away all the scf logs */
2834 	for (i = 0; i < BANKNUM_PER_SB; i++) {
2835 		while ((p = mcp->mc_scf_log[i]) != NULL) {
2836 			mcp->mc_scf_log[i] = p->sl_next;
2837 			mcp->mc_scf_total[i]--;
2838 			kmem_free(p, sizeof (scf_log_t));
2839 		}
2840 	}
2841 
2842 	if (mcp->mlist)
2843 		mc_memlist_delete(mcp->mlist);
2844 
2845 	if (mcp->mc_dimm_list)
2846 		mc_free_dimm_list(mcp->mc_dimm_list);
2847 
2848 	mutex_exit(&mcp->mc_lock);
2849 
2850 	mutex_destroy(&mcp->mc_lock);
2851 	return (DDI_SUCCESS);
2852 }
2853 
2854 int
2855 mc_suspend(mc_opl_t *mcp, uint32_t flag)
2856 {
2857 	/* stop memory patrol checking */
2858 	mutex_enter(&mcp->mc_lock);
2859 	if (mcp->mc_status & MC_MEMORYLESS) {
2860 		mutex_exit(&mcp->mc_lock);
2861 		return (DDI_SUCCESS);
2862 	}
2863 
2864 	mcp->mc_status &= ~MC_POLL_RUNNING;
2865 
2866 	mcp->mc_status |= flag;
2867 	mutex_exit(&mcp->mc_lock);
2868 
2869 	return (DDI_SUCCESS);
2870 }
2871 
2872 void
2873 opl_mc_update_mlist(void)
2874 {
2875 	int i;
2876 	mc_opl_t *mcp;
2877 
2878 	/*
2879 	 * memory information is not updated until
2880 	 * the post attach/detach stage during DR.
2881 	 * This interface is used by dr_mem to inform
2882 	 * mc-opl to update the mlist.
2883 	 */
2884 
2885 	mutex_enter(&mcmutex);
2886 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2887 		if ((mcp = mc_instances[i]) == NULL)
2888 			continue;
2889 		mutex_enter(&mcp->mc_lock);
2890 		if (mcp->mlist)
2891 			mc_memlist_delete(mcp->mlist);
2892 		mcp->mlist = NULL;
2893 		mc_get_mlist(mcp);
2894 		mutex_exit(&mcp->mc_lock);
2895 	}
2896 	mutex_exit(&mcmutex);
2897 }
2898 
2899 /* caller must clear the SUSPEND bits or this will do nothing */
2900 
2901 int
2902 mc_resume(mc_opl_t *mcp, uint32_t flag)
2903 {
2904 	int i;
2905 	uint64_t basepa;
2906 
2907 	mutex_enter(&mcp->mc_lock);
2908 	if (mcp->mc_status & MC_MEMORYLESS) {
2909 		mutex_exit(&mcp->mc_lock);
2910 		return (DDI_SUCCESS);
2911 	}
2912 	basepa = mcp->mc_start_address;
2913 	if (get_base_address(mcp) == DDI_FAILURE) {
2914 		mutex_exit(&mcp->mc_lock);
2915 		return (DDI_FAILURE);
2916 	}
2917 
2918 	if (basepa != mcp->mc_start_address) {
2919 		if (mcp->mlist)
2920 			mc_memlist_delete(mcp->mlist);
2921 		mcp->mlist = NULL;
2922 		mc_get_mlist(mcp);
2923 	}
2924 
2925 	mcp->mc_status &= ~flag;
2926 
2927 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2928 		mutex_exit(&mcp->mc_lock);
2929 		return (DDI_SUCCESS);
2930 	}
2931 
2932 	if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2933 		/* restart memory patrol checking */
2934 		mcp->mc_status |= MC_POLL_RUNNING;
2935 		for (i = 0; i < BANKNUM_PER_SB; i++) {
2936 			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2937 				mc_check_errors_func(mcp);
2938 			}
2939 		}
2940 	}
2941 	mutex_exit(&mcp->mc_lock);
2942 
2943 	return (DDI_SUCCESS);
2944 }
2945 
2946 static mc_opl_t *
2947 mc_pa_to_mcp(uint64_t pa)
2948 {
2949 	mc_opl_t *mcp;
2950 	int i;
2951 
2952 	ASSERT(MUTEX_HELD(&mcmutex));
2953 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
2954 		if ((mcp = mc_instances[i]) == NULL)
2955 			continue;
2956 		/* if mac patrol is suspended, we cannot rely on it */
2957 		if (!(mcp->mc_status & MC_POLL_RUNNING) ||
2958 		    (mcp->mc_status & MC_SOFT_SUSPENDED))
2959 			continue;
2960 		if (mc_rangecheck_pa(mcp, pa)) {
2961 			return (mcp);
2962 		}
2963 	}
2964 	return (NULL);
2965 }
2966 
2967 /*
2968  * Get Physical Board number from Logical one.
2969  */
2970 static int
2971 mc_opl_get_physical_board(int sb)
2972 {
2973 	if (&opl_get_physical_board) {
2974 		return (opl_get_physical_board(sb));
2975 	}
2976 
2977 	cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n");
2978 	return (-1);
2979 }
2980 
2981 /* ARGSUSED */
2982 int
2983 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen,
2984 	int *lenp)
2985 {
2986 	int i;
2987 	int j;
2988 	int sb;
2989 	int bank;
2990 	int cs;
2991 	int rv = 0;
2992 	mc_opl_t *mcp;
2993 	char memb_num;
2994 
2995 	mutex_enter(&mcmutex);
2996 
2997 	if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) ||
2998 	    (!pa_is_valid(mcp, flt_addr))) {
2999 		mutex_exit(&mcmutex);
3000 		if (snprintf(buf, buflen, "UNKNOWN") >= buflen) {
3001 			return (ENOSPC);
3002 		} else {
3003 			if (lenp)
3004 				*lenp = strlen(buf);
3005 		}
3006 		return (0);
3007 	}
3008 
3009 	bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address);
3010 	sb = mcp->mc_phys_board_num;
3011 	cs = pa_to_cs(mcp, flt_addr - mcp->mc_start_address);
3012 
3013 	if (sb == -1) {
3014 		mutex_exit(&mcmutex);
3015 		return (ENXIO);
3016 	}
3017 
3018 	switch (plat_model) {
3019 	case MODEL_DC:
3020 		i = BD_BK_SLOT_TO_INDEX(0, bank, 0);
3021 		j = (cs == 0) ? i : i + 2;
3022 		(void) snprintf(buf, buflen, "/%s%02d/MEM%s MEM%s",
3023 		    model_names[plat_model].unit_name, sb,
3024 		    mc_dc_dimm_unum_table[j],
3025 		    mc_dc_dimm_unum_table[j + 1]);
3026 		break;
3027 	case MODEL_FF2:
3028 	case MODEL_FF1:
3029 		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
3030 		j = (cs == 0) ? i : i + 2;
3031 		memb_num = mc_ff_dimm_unum_table[i][0];
3032 		(void) snprintf(buf, buflen, "/%s/%s%c/MEM%s MEM%s",
3033 		    model_names[plat_model].unit_name,
3034 		    model_names[plat_model].mem_name, memb_num,
3035 		    &mc_ff_dimm_unum_table[j][1],
3036 		    &mc_ff_dimm_unum_table[j + 1][1]);
3037 		break;
3038 	case MODEL_IKKAKU:
3039 		i = BD_BK_SLOT_TO_INDEX(sb, bank, 0);
3040 		j = (cs == 0) ? i : i + 2;
3041 		(void) snprintf(buf, buflen, "/%s/MEM%s MEM%s",
3042 		    model_names[plat_model].unit_name,
3043 		    &mc_ff_dimm_unum_table[j][1],
3044 		    &mc_ff_dimm_unum_table[j + 1][1]);
3045 		break;
3046 	default:
3047 		rv = ENXIO;
3048 	}
3049 	if (lenp) {
3050 		*lenp = strlen(buf);
3051 	}
3052 	mutex_exit(&mcmutex);
3053 	return (rv);
3054 }
3055 
3056 int
3057 opl_mc_suspend(void)
3058 {
3059 	mc_opl_t *mcp;
3060 	int i;
3061 
3062 	mutex_enter(&mcmutex);
3063 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3064 		if ((mcp = mc_instances[i]) == NULL)
3065 			continue;
3066 		(void) mc_suspend(mcp, MC_SOFT_SUSPENDED);
3067 	}
3068 	mutex_exit(&mcmutex);
3069 
3070 	return (0);
3071 }
3072 
3073 int
3074 opl_mc_resume(void)
3075 {
3076 	mc_opl_t *mcp;
3077 	int i;
3078 
3079 	mutex_enter(&mcmutex);
3080 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3081 		if ((mcp = mc_instances[i]) == NULL)
3082 			continue;
3083 		(void) mc_resume(mcp, MC_SOFT_SUSPENDED);
3084 	}
3085 	mutex_exit(&mcmutex);
3086 
3087 	return (0);
3088 }
3089 static void
3090 insert_mcp(mc_opl_t *mcp)
3091 {
3092 	mutex_enter(&mcmutex);
3093 	if (mc_instances[mcp->mc_board_num] != NULL) {
3094 		MC_LOG("mc-opl instance for board# %d already exists\n",
3095 		    mcp->mc_board_num);
3096 	}
3097 	mc_instances[mcp->mc_board_num] = mcp;
3098 	mutex_exit(&mcmutex);
3099 }
3100 
3101 static void
3102 delete_mcp(mc_opl_t *mcp)
3103 {
3104 	mutex_enter(&mcmutex);
3105 	mc_instances[mcp->mc_board_num] = 0;
3106 	mutex_exit(&mcmutex);
3107 }
3108 
3109 /* Error injection interface */
3110 
3111 static void
3112 mc_lock_va(uint64_t pa, caddr_t new_va)
3113 {
3114 	tte_t tte;
3115 
3116 	vtag_flushpage(new_va, (uint64_t)ksfmmup);
3117 	sfmmu_memtte(&tte, pa >> PAGESHIFT, PROC_DATA|HAT_NOSYNC, TTE8K);
3118 	tte.tte_intlo |= TTE_LCK_INT;
3119 	sfmmu_dtlb_ld_kva(new_va, &tte);
3120 }
3121 
3122 static void
3123 mc_unlock_va(caddr_t va)
3124 {
3125 	vtag_flushpage(va, (uint64_t)ksfmmup);
3126 }
3127 
3128 /* ARGSUSED */
3129 int
3130 mc_inject_error(int error_type, uint64_t pa, uint32_t flags)
3131 {
3132 	mc_opl_t *mcp;
3133 	int bank;
3134 	uint32_t dimm_addr;
3135 	uint32_t cntl;
3136 	mc_rsaddr_info_t rsaddr;
3137 	uint32_t data, stat;
3138 	int both_sides = 0;
3139 	uint64_t pa0;
3140 	int extra_injection_needed = 0;
3141 	extern void cpu_flush_ecache(void);
3142 
3143 	MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags);
3144 
3145 	mutex_enter(&mcmutex);
3146 	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3147 		mutex_exit(&mcmutex);
3148 		MC_LOG("mc_inject_error: invalid pa\n");
3149 		return (ENOTSUP);
3150 	}
3151 
3152 	mutex_enter(&mcp->mc_lock);
3153 	mutex_exit(&mcmutex);
3154 
3155 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
3156 		mutex_exit(&mcp->mc_lock);
3157 		MC_LOG("mc-opl has been suspended.  No error injection.\n");
3158 		return (EBUSY);
3159 	}
3160 
3161 	/* convert pa to offset within the board */
3162 	MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address);
3163 
3164 	if (!pa_is_valid(mcp, pa)) {
3165 		mutex_exit(&mcp->mc_lock);
3166 		return (EINVAL);
3167 	}
3168 
3169 	pa0 = pa - mcp->mc_start_address;
3170 
3171 	bank = pa_to_bank(mcp, pa0);
3172 
3173 	if (flags & MC_INJECT_FLAG_OTHER)
3174 		bank = bank ^ 1;
3175 
3176 	if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) {
3177 		mutex_exit(&mcp->mc_lock);
3178 		MC_LOG("Not mirror mode\n");
3179 		return (EINVAL);
3180 	}
3181 
3182 	dimm_addr = pa_to_dimm(mcp, pa0);
3183 
3184 	MC_LOG("injecting error to /LSB%d/B%d/%x\n", mcp->mc_board_num, bank,
3185 	    dimm_addr);
3186 
3187 
3188 	switch (error_type) {
3189 	case MC_INJECT_INTERMITTENT_MCE:
3190 	case MC_INJECT_PERMANENT_MCE:
3191 	case MC_INJECT_MUE:
3192 		both_sides = 1;
3193 	}
3194 
3195 	if (flags & MC_INJECT_FLAG_RESET)
3196 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0);
3197 
3198 	ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK);
3199 
3200 	if (both_sides) {
3201 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0);
3202 		ST_MAC_REG(MAC_EG_ADD(mcp, bank^1), dimm_addr &
3203 		    MAC_EG_ADD_MASK);
3204 	}
3205 
3206 	switch (error_type) {
3207 	case MC_INJECT_SUE:
3208 		extra_injection_needed = 1;
3209 		/*FALLTHROUGH*/
3210 	case MC_INJECT_UE:
3211 	case MC_INJECT_MUE:
3212 		if (flags & MC_INJECT_FLAG_PATH) {
3213 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3214 			    MAC_EG_FORCE_READ16 | MAC_EG_RDERR_ONCE;
3215 		} else {
3216 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR00 |
3217 			    MAC_EG_FORCE_DERR16 | MAC_EG_DERR_ONCE;
3218 		}
3219 		flags |= MC_INJECT_FLAG_ST;
3220 		break;
3221 	case MC_INJECT_INTERMITTENT_CE:
3222 	case MC_INJECT_INTERMITTENT_MCE:
3223 		if (flags & MC_INJECT_FLAG_PATH) {
3224 			cntl = MAC_EG_ADD_FIX |MAC_EG_FORCE_READ00 |
3225 			    MAC_EG_RDERR_ONCE;
3226 		} else {
3227 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3228 			    MAC_EG_DERR_ONCE;
3229 		}
3230 		extra_injection_needed = 1;
3231 		flags |= MC_INJECT_FLAG_ST;
3232 		break;
3233 	case MC_INJECT_PERMANENT_CE:
3234 	case MC_INJECT_PERMANENT_MCE:
3235 		if (flags & MC_INJECT_FLAG_PATH) {
3236 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_READ00 |
3237 			    MAC_EG_RDERR_ALWAYS;
3238 		} else {
3239 			cntl = MAC_EG_ADD_FIX | MAC_EG_FORCE_DERR16 |
3240 			    MAC_EG_DERR_ALWAYS;
3241 		}
3242 		flags |= MC_INJECT_FLAG_ST;
3243 		break;
3244 	case MC_INJECT_CMPE:
3245 		data = 0xabcdefab;
3246 		stphys(pa, data);
3247 		cpu_flush_ecache();
3248 		MC_LOG("CMPE: writing data %x to %lx\n", data, pa);
3249 		ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE);
3250 		stphys(pa, data ^ 0xffffffff);
3251 		membar_sync();
3252 		cpu_flush_ecache();
3253 		ST_MAC_REG(MAC_MIRR(mcp, bank), 0);
3254 		MC_LOG("CMPE: write new data %xto %lx\n", data, pa);
3255 		cntl = 0;
3256 		break;
3257 	case MC_INJECT_NOP:
3258 		cntl = 0;
3259 		break;
3260 	default:
3261 		MC_LOG("mc_inject_error: invalid option\n");
3262 		cntl = 0;
3263 	}
3264 
3265 	if (cntl) {
3266 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK);
3267 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3268 
3269 		if (both_sides) {
3270 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3271 			    MAC_EG_SETUP_MASK);
3272 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3273 		}
3274 	}
3275 
3276 	/*
3277 	 * For all injection cases except compare error, we
3278 	 * must write to the PA to trigger the error.
3279 	 */
3280 
3281 	if (flags & MC_INJECT_FLAG_ST) {
3282 		data = 0xf0e0d0c0;
3283 		MC_LOG("Writing %x to %lx\n", data, pa);
3284 		stphys(pa, data);
3285 		cpu_flush_ecache();
3286 	}
3287 
3288 
3289 	if (flags & MC_INJECT_FLAG_LD) {
3290 		if (flags & MC_INJECT_FLAG_PREFETCH) {
3291 			/*
3292 			 * Use strong prefetch operation to
3293 			 * inject MI errors.
3294 			 */
3295 			page_t *pp;
3296 			extern void mc_prefetch(caddr_t);
3297 
3298 			MC_LOG("prefetch\n");
3299 
3300 			pp = page_numtopp_nolock(pa >> PAGESHIFT);
3301 			if (pp != NULL) {
3302 				caddr_t	va, va1;
3303 
3304 				va = ppmapin(pp, PROT_READ|PROT_WRITE,
3305 				    (caddr_t)-1);
3306 				kpreempt_disable();
3307 				mc_lock_va((uint64_t)pa, va);
3308 				va1 = va + (pa & (PAGESIZE - 1));
3309 				mc_prefetch(va1);
3310 				mc_unlock_va(va);
3311 				kpreempt_enable();
3312 				ppmapout(va);
3313 
3314 				/*
3315 				 * For MI errors, we need one extra
3316 				 * injection for HW patrol to stop.
3317 				 */
3318 				extra_injection_needed = 1;
3319 			} else {
3320 				cmn_err(CE_WARN, "Cannot find page structure"
3321 				    " for PA %lx\n", pa);
3322 			}
3323 		} else {
3324 			MC_LOG("Reading from %lx\n", pa);
3325 			data = ldphys(pa);
3326 			MC_LOG("data = %x\n", data);
3327 		}
3328 
3329 		if (extra_injection_needed) {
3330 			/*
3331 			 * These are the injection cases where the
3332 			 * requested injected errors will not cause the HW
3333 			 * patrol to stop. For these cases, we need to inject
3334 			 * an extra 'real' PTRL error to force the
3335 			 * HW patrol to stop so that we can report the
3336 			 * errors injected. Note that we cannot read
3337 			 * and report error status while the HW patrol
3338 			 * is running.
3339 			 */
3340 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank),
3341 			    cntl & MAC_EG_SETUP_MASK);
3342 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
3343 
3344 			if (both_sides) {
3345 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
3346 				    MAC_EG_SETUP_MASK);
3347 				ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
3348 			}
3349 			data = 0xf0e0d0c0;
3350 			MC_LOG("Writing %x to %lx\n", data, pa);
3351 			stphys(pa, data);
3352 			cpu_flush_ecache();
3353 		}
3354 	}
3355 
3356 	if (flags & MC_INJECT_FLAG_RESTART) {
3357 		MC_LOG("Restart patrol\n");
3358 		rsaddr.mi_restartaddr.ma_bd = mcp->mc_board_num;
3359 		rsaddr.mi_restartaddr.ma_bank = bank;
3360 		rsaddr.mi_restartaddr.ma_dimm_addr = dimm_addr;
3361 		rsaddr.mi_valid = 1;
3362 		rsaddr.mi_injectrestart = 1;
3363 		(void) restart_patrol(mcp, bank, &rsaddr);
3364 	}
3365 
3366 	if (flags & MC_INJECT_FLAG_POLL) {
3367 		int running;
3368 		int ebank = (IS_MIRROR(mcp, bank)) ? MIRROR_IDX(bank) : bank;
3369 
3370 		MC_LOG("Poll patrol error\n");
3371 		stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank));
3372 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
3373 		running = cntl & MAC_CNTL_PTRL_START;
3374 
3375 		if (!running &&
3376 		    (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS))) {
3377 			/*
3378 			 * HW patrol stopped and we have errors to
3379 			 * report. Do it.
3380 			 */
3381 			mcp->mc_speedup_period[ebank] = 0;
3382 			rsaddr.mi_valid = 0;
3383 			rsaddr.mi_injectrestart = 0;
3384 			if (IS_MIRROR(mcp, bank)) {
3385 				mc_error_handler_mir(mcp, bank, &rsaddr);
3386 			} else {
3387 				mc_error_handler(mcp, bank, &rsaddr);
3388 			}
3389 
3390 			(void) restart_patrol(mcp, bank, &rsaddr);
3391 		} else {
3392 			/*
3393 			 * We are expecting to report injected
3394 			 * errors but the HW patrol is still running.
3395 			 * Speed up the scanning
3396 			 */
3397 			mcp->mc_speedup_period[ebank] = 2;
3398 			MAC_CMD(mcp, bank, 0);
3399 			(void) restart_patrol(mcp, bank, NULL);
3400 		}
3401 	}
3402 
3403 	mutex_exit(&mcp->mc_lock);
3404 	return (0);
3405 }
3406 
3407 void
3408 mc_stphysio(uint64_t pa, uint32_t data)
3409 {
3410 	MC_LOG("0x%x -> pa(%lx)\n", data, pa);
3411 	stphysio(pa, data);
3412 
3413 	/* force the above write to be processed by mac patrol */
3414 	data = ldphysio(pa);
3415 	MC_LOG("pa(%lx) = 0x%x\n", pa, data);
3416 }
3417 
3418 uint32_t
3419 mc_ldphysio(uint64_t pa)
3420 {
3421 	uint32_t rv;
3422 
3423 	rv = ldphysio(pa);
3424 	MC_LOG("pa(%lx) = 0x%x\n", pa, rv);
3425 	return (rv);
3426 }
3427 
3428 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3429 
3430 /*
3431  * parse_unum_memory -- extract the board number and the DIMM name from
3432  * the unum.
3433  *
3434  * Return 0 for success and non-zero for a failure.
3435  */
3436 int
3437 parse_unum_memory(char *unum, int *board, char *dname)
3438 {
3439 	char *c;
3440 	char x, y, z;
3441 
3442 	if ((c = strstr(unum, "CMU")) != NULL) {
3443 		/* DC Model */
3444 		c += 3;
3445 		*board = (uint8_t)stoi(&c);
3446 		if ((c = strstr(c, "MEM")) == NULL) {
3447 			return (1);
3448 		}
3449 		c += 3;
3450 		if (strlen(c) < 3) {
3451 			return (2);
3452 		}
3453 		if ((!isdigit(c[0])) || (!(isdigit(c[1]))) ||
3454 		    ((c[2] != 'A') && (c[2] != 'B'))) {
3455 			return (3);
3456 		}
3457 		x = c[0];
3458 		y = c[1];
3459 		z = c[2];
3460 	} else if ((c = strstr(unum, "MBU_")) != NULL) {
3461 		/*  FF1/FF2/Ikkaku Model */
3462 		c += 4;
3463 		if ((c[0] != 'A') && (c[0] != 'B')) {
3464 			return (4);
3465 		}
3466 		if (plat_model == MODEL_IKKAKU) {
3467 			/* Ikkaku Model */
3468 			x = '0';
3469 			*board = 0;
3470 		} else {
3471 			/* FF1/FF2 Model */
3472 			if ((c = strstr(c, "MEMB")) == NULL) {
3473 				return (5);
3474 			}
3475 			c += 4;
3476 
3477 			x = c[0];
3478 			*board =  ((uint8_t)stoi(&c)) / 4;
3479 		}
3480 
3481 		if ((c = strstr(c, "MEM")) == NULL) {
3482 			return (6);
3483 		}
3484 		c += 3;
3485 		if (strlen(c) < 2) {
3486 			return (7);
3487 		}
3488 		if ((!isdigit(c[0])) || ((c[1] != 'A') && (c[1] != 'B'))) {
3489 			return (8);
3490 		}
3491 		y = c[0];
3492 		z = c[1];
3493 	} else {
3494 		return (9);
3495 	}
3496 	if (*board < 0) {
3497 		return (10);
3498 	}
3499 	dname[0] = x;
3500 	dname[1] = y;
3501 	dname[2] = z;
3502 	dname[3] = '\0';
3503 	return (0);
3504 }
3505 
3506 /*
3507  * mc_get_mem_sid_dimm -- Get the serial-ID for a given board and
3508  * the DIMM name.
3509  */
3510 int
3511 mc_get_mem_sid_dimm(mc_opl_t *mcp, char *dname, char *buf,
3512     int buflen, int *lenp)
3513 {
3514 	int		ret = ENODEV;
3515 	mc_dimm_info_t	*d = NULL;
3516 
3517 	if ((d = mcp->mc_dimm_list) == NULL) {
3518 		MC_LOG("mc_get_mem_sid_dimm: mc_dimm_list is NULL\n");
3519 		return (EINVAL);
3520 		}
3521 
3522 	for (; d != NULL; d = d->md_next) {
3523 		if (strcmp(d->md_dimmname, dname) == 0) {
3524 			break;
3525 		}
3526 	}
3527 	if (d != NULL) {
3528 		*lenp = strlen(d->md_serial) + strlen(d->md_partnum);
3529 		if (buflen <=  *lenp) {
3530 			cmn_err(CE_WARN, "mc_get_mem_sid_dimm: "
3531 			    "buflen is smaller than %d\n", *lenp);
3532 			ret = ENOSPC;
3533 		} else {
3534 			(void) snprintf(buf, buflen, "%s:%s",
3535 			    d->md_serial, d->md_partnum);
3536 			ret = 0;
3537 		}
3538 	}
3539 	MC_LOG("mc_get_mem_sid_dimm: Ret=%d Name=%s Serial-ID=%s\n",
3540 	    ret, dname, (ret == 0) ? buf : "");
3541 	return (ret);
3542 }
3543 
3544 int
3545 mc_set_mem_sid(mc_opl_t *mcp, char *buf, int buflen, int sb,
3546     int bank, uint32_t mf_type, uint32_t d_slot)
3547 {
3548 	int	lenp = buflen;
3549 	int	id;
3550 	int	ret;
3551 	char	*dimmnm;
3552 
3553 	if (mf_type == FLT_TYPE_INTERMITTENT_CE ||
3554 	    mf_type == FLT_TYPE_PERMANENT_CE) {
3555 		if (plat_model == MODEL_DC) {
3556 			/*
3557 			 * All DC models
3558 			 */
3559 			id = BD_BK_SLOT_TO_INDEX(0, bank, d_slot);
3560 			dimmnm = mc_dc_dimm_unum_table[id];
3561 		} else {
3562 			/*
3563 			 * All FF and Ikkaku models
3564 			 */
3565 			id = BD_BK_SLOT_TO_INDEX(sb, bank, d_slot);
3566 			dimmnm = mc_ff_dimm_unum_table[id];
3567 		}
3568 		if ((ret = mc_get_mem_sid_dimm(mcp, dimmnm, buf, buflen,
3569 		    &lenp)) != 0) {
3570 			return (ret);
3571 		}
3572 	} else {
3573 		return (1);
3574 	}
3575 
3576 	return (0);
3577 }
3578 
3579 /*
3580  * mc_get_mem_sid -- get the DIMM serial-ID corresponding to the unum.
3581  */
3582 int
3583 mc_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3584 {
3585 	int	i;
3586 	int	ret = ENODEV;
3587 	int	board;
3588 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3589 	mc_opl_t *mcp;
3590 
3591 	MC_LOG("mc_get_mem_sid: unum=%s buflen=%d\n", unum, buflen);
3592 	if ((ret = parse_unum_memory(unum, &board, dname)) != 0) {
3593 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3594 		    unum, ret);
3595 		return (EINVAL);
3596 	}
3597 
3598 	if (board < 0) {
3599 		MC_LOG("mc_get_mem_sid: Invalid board=%d dimm=%s\n",
3600 		    board, dname);
3601 		return (EINVAL);
3602 	}
3603 
3604 	mutex_enter(&mcmutex);
3605 	/*
3606 	 * return ENOENT if we can not find the matching board.
3607 	 */
3608 	ret = ENOENT;
3609 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3610 		if ((mcp = mc_instances[i]) == NULL)
3611 			continue;
3612 		mutex_enter(&mcp->mc_lock);
3613 		if (mcp->mc_phys_board_num != board) {
3614 			mutex_exit(&mcp->mc_lock);
3615 			continue;
3616 		}
3617 		ret = mc_get_mem_sid_dimm(mcp, dname, buf, buflen, lenp);
3618 		if (ret == 0) {
3619 			mutex_exit(&mcp->mc_lock);
3620 			break;
3621 		}
3622 		mutex_exit(&mcp->mc_lock);
3623 	}
3624 	mutex_exit(&mcmutex);
3625 	return (ret);
3626 }
3627 
3628 /*
3629  * mc_get_mem_offset -- get the offset in a DIMM for a given physical address.
3630  */
3631 int
3632 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
3633 {
3634 	int		i;
3635 	int		ret = ENODEV;
3636 	mc_addr_t	maddr;
3637 	mc_opl_t	*mcp;
3638 
3639 	mutex_enter(&mcmutex);
3640 	for (i = 0; ((i < OPL_MAX_BOARDS) && (ret != 0)); i++) {
3641 		if ((mcp = mc_instances[i]) == NULL)
3642 			continue;
3643 		mutex_enter(&mcp->mc_lock);
3644 		if (!pa_is_valid(mcp, paddr)) {
3645 			mutex_exit(&mcp->mc_lock);
3646 			continue;
3647 		}
3648 		if (pa_to_maddr(mcp, paddr, &maddr) == 0) {
3649 			*offp = maddr.ma_dimm_addr;
3650 			ret = 0;
3651 		}
3652 		mutex_exit(&mcp->mc_lock);
3653 	}
3654 	mutex_exit(&mcmutex);
3655 	MC_LOG("mc_get_mem_offset: Ret=%d paddr=0x%lx offset=0x%lx\n",
3656 	    ret, paddr, *offp);
3657 	return (ret);
3658 }
3659 
3660 /*
3661  * dname_to_bankslot - Get the bank and slot number from the DIMM name.
3662  */
3663 int
3664 dname_to_bankslot(char *dname, int *bank, int *slot)
3665 {
3666 	int i;
3667 	int tsz;
3668 	char **tbl;
3669 
3670 	if (plat_model == MODEL_DC) {
3671 		/*
3672 		 * All DC models
3673 		 */
3674 		tbl = mc_dc_dimm_unum_table;
3675 		tsz = OPL_MAX_DIMMS;
3676 	} else {
3677 		/*
3678 		 * All FF and Ikkaku models
3679 		 */
3680 		tbl = mc_ff_dimm_unum_table;
3681 		tsz = 2 * OPL_MAX_DIMMS;
3682 	}
3683 
3684 	for (i = 0; i < tsz; i++) {
3685 		if (strcmp(dname,  tbl[i]) == 0) {
3686 			break;
3687 		}
3688 	}
3689 	if (i == tsz) {
3690 		return (1);
3691 	}
3692 	*bank = INDEX_TO_BANK(i);
3693 	*slot = INDEX_TO_SLOT(i);
3694 	return (0);
3695 }
3696 
3697 /*
3698  * mc_get_mem_addr -- get the physical address of a DIMM corresponding
3699  * to the unum and sid.
3700  */
3701 int
3702 mc_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *paddr)
3703 {
3704 	int	board;
3705 	int	bank;
3706 	int	slot;
3707 	int	i;
3708 	int	ret = ENODEV;
3709 	char	dname[MCOPL_MAX_DIMMNAME + 1];
3710 	mc_addr_t maddr;
3711 	mc_opl_t *mcp;
3712 
3713 	MC_LOG("mc_get_mem_addr: unum=%s sid=%s offset=0x%lx\n",
3714 	    unum, sid, offset);
3715 	if (parse_unum_memory(unum, &board, dname) != 0) {
3716 		MC_LOG("mc_get_mem_sid: unum(%s) parsing failed ret=%d\n",
3717 		    unum, ret);
3718 		return (EINVAL);
3719 	}
3720 
3721 	if (board < 0) {
3722 		MC_LOG("mc_get_mem_addr: Invalid board=%d dimm=%s\n",
3723 		    board, dname);
3724 		return (EINVAL);
3725 	}
3726 
3727 	mutex_enter(&mcmutex);
3728 	for (i = 0; i < OPL_MAX_BOARDS; i++) {
3729 		if ((mcp = mc_instances[i]) == NULL)
3730 			continue;
3731 		mutex_enter(&mcp->mc_lock);
3732 		if (mcp->mc_phys_board_num != board) {
3733 			mutex_exit(&mcp->mc_lock);
3734 			continue;
3735 		}
3736 
3737 		ret = dname_to_bankslot(dname, &bank, &slot);
3738 		MC_LOG("mc_get_mem_addr: bank=%d slot=%d\n", bank, slot);
3739 		if (ret != 0) {
3740 			MC_LOG("mc_get_mem_addr: dname_to_bankslot failed\n");
3741 			ret = ENODEV;
3742 		} else {
3743 			maddr.ma_bd = mcp->mc_board_num;
3744 			maddr.ma_bank =  bank;
3745 			maddr.ma_dimm_addr = offset;
3746 			ret = mcaddr_to_pa(mcp, &maddr, paddr);
3747 			if (ret != 0) {
3748 				MC_LOG("mc_get_mem_addr: "
3749 				    "mcaddr_to_pa failed\n");
3750 				ret = ENODEV;
3751 				mutex_exit(&mcp->mc_lock);
3752 				continue;
3753 			}
3754 			mutex_exit(&mcp->mc_lock);
3755 			break;
3756 		}
3757 		mutex_exit(&mcp->mc_lock);
3758 	}
3759 	mutex_exit(&mcmutex);
3760 	MC_LOG("mc_get_mem_addr: Ret=%d, Paddr=0x%lx\n", ret, *paddr);
3761 	return (ret);
3762 }
3763 
3764 static void
3765 mc_free_dimm_list(mc_dimm_info_t *d)
3766 {
3767 	mc_dimm_info_t *next;
3768 
3769 	while (d != NULL) {
3770 		next = d->md_next;
3771 		kmem_free(d, sizeof (mc_dimm_info_t));
3772 		d = next;
3773 	}
3774 }
3775 
3776 /*
3777  * mc_get_dimm_list -- get the list of dimms with serial-id info
3778  * from the SP.
3779  */
3780 mc_dimm_info_t *
3781 mc_get_dimm_list(mc_opl_t *mcp)
3782 {
3783 	uint32_t	bufsz;
3784 	uint32_t	maxbufsz;
3785 	int		ret;
3786 	int		sexp;
3787 	board_dimm_info_t *bd_dimmp;
3788 	mc_dimm_info_t	*dimm_list = NULL;
3789 
3790 	maxbufsz = bufsz = sizeof (board_dimm_info_t) +
3791 	    ((MCOPL_MAX_DIMMNAME +  MCOPL_MAX_SERIAL +
3792 	    MCOPL_MAX_PARTNUM) * OPL_MAX_DIMMS);
3793 
3794 	bd_dimmp = (board_dimm_info_t *)kmem_alloc(bufsz, KM_SLEEP);
3795 	ret = scf_get_dimminfo(mcp->mc_board_num, (void *)bd_dimmp, &bufsz);
3796 
3797 	MC_LOG("mc_get_dimm_list:  scf_service_getinfo returned=%d\n", ret);
3798 	if (ret == 0) {
3799 		sexp = sizeof (board_dimm_info_t) +
3800 		    ((bd_dimmp->bd_dnamesz +  bd_dimmp->bd_serialsz +
3801 		    bd_dimmp->bd_partnumsz) * bd_dimmp->bd_numdimms);
3802 
3803 		if ((bd_dimmp->bd_version == OPL_DIMM_INFO_VERSION) &&
3804 		    (bd_dimmp->bd_dnamesz <= MCOPL_MAX_DIMMNAME) &&
3805 		    (bd_dimmp->bd_serialsz <= MCOPL_MAX_SERIAL) &&
3806 		    (bd_dimmp->bd_partnumsz <= MCOPL_MAX_PARTNUM) &&
3807 		    (sexp <= bufsz)) {
3808 
3809 #ifdef DEBUG
3810 			if (oplmc_debug)
3811 				mc_dump_dimm_info(bd_dimmp);
3812 #endif
3813 			dimm_list = mc_prepare_dimmlist(bd_dimmp);
3814 
3815 		} else {
3816 			cmn_err(CE_WARN, "DIMM info version mismatch\n");
3817 		}
3818 	}
3819 	kmem_free(bd_dimmp, maxbufsz);
3820 	MC_LOG("mc_get_dimm_list: dimmlist=0x%p\n", (void *)dimm_list);
3821 	return (dimm_list);
3822 }
3823 
3824 /*
3825  * mc_prepare_dimmlist - Prepare the dimm list from the information
3826  * received from the SP.
3827  */
3828 mc_dimm_info_t *
3829 mc_prepare_dimmlist(board_dimm_info_t *bd_dimmp)
3830 {
3831 	char	*dimm_name;
3832 	char	*serial;
3833 	char	*part;
3834 	int	dimm;
3835 	int	dnamesz = bd_dimmp->bd_dnamesz;
3836 	int	sersz = bd_dimmp->bd_serialsz;
3837 	int	partsz = bd_dimmp->bd_partnumsz;
3838 	mc_dimm_info_t	*dimm_list = NULL;
3839 	mc_dimm_info_t	*d;
3840 
3841 	dimm_name = (char *)(bd_dimmp + 1);
3842 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
3843 
3844 		d = (mc_dimm_info_t *)kmem_alloc(sizeof (mc_dimm_info_t),
3845 		    KM_SLEEP);
3846 
3847 		bcopy(dimm_name, d->md_dimmname, dnamesz);
3848 		d->md_dimmname[dnamesz] = 0;
3849 
3850 		serial = dimm_name + dnamesz;
3851 		bcopy(serial, d->md_serial, sersz);
3852 		d->md_serial[sersz] = 0;
3853 
3854 		part = serial + sersz;
3855 		bcopy(part, d->md_partnum, partsz);
3856 		d->md_partnum[partsz] = 0;
3857 
3858 		d->md_next = dimm_list;
3859 		dimm_list = d;
3860 		dimm_name = part + partsz;
3861 	}
3862 	return (dimm_list);
3863 }
3864 
3865 static int
3866 mc_get_mem_fmri(mc_flt_page_t *fpag, char **unum)
3867 {
3868 	if (fpag->fmri_addr == 0 || fpag->fmri_sz > MEM_FMRI_MAX_BUFSIZE)
3869 		return (EINVAL);
3870 
3871 	*unum = kmem_alloc(fpag->fmri_sz, KM_SLEEP);
3872 	if (copyin((void *)fpag->fmri_addr, *unum, fpag->fmri_sz) != 0) {
3873 		kmem_free(*unum, fpag->fmri_sz);
3874 		return (EFAULT);
3875 	}
3876 	return (0);
3877 }
3878 
3879 static int
3880 mc_scf_log_event(mc_flt_page_t *flt_pag)
3881 {
3882 	mc_opl_t *mcp;
3883 	int board, bank, slot;
3884 	int len, rv = 0;
3885 	char *unum, *sid;
3886 	char dname[MCOPL_MAX_DIMMNAME + 1];
3887 	size_t sid_sz;
3888 	uint64_t pa;
3889 	mc_flt_stat_t flt_stat;
3890 
3891 	if ((sid_sz = cpu_get_name_bufsize()) == 0)
3892 		return (ENOTSUP);
3893 
3894 	if ((rv = mc_get_mem_fmri(flt_pag, &unum)) != 0) {
3895 		MC_LOG("mc_scf_log_event: mc_get_mem_fmri failed\n");
3896 		return (rv);
3897 	}
3898 
3899 	sid = kmem_zalloc(sid_sz, KM_SLEEP);
3900 
3901 	if ((rv = mc_get_mem_sid(unum, sid, sid_sz, &len)) != 0) {
3902 		MC_LOG("mc_scf_log_event: mc_get_mem_sid failed\n");
3903 		goto out;
3904 	}
3905 
3906 	if ((rv = mc_get_mem_addr(unum, sid, (uint64_t)flt_pag->err_add,
3907 	    &pa)) != 0) {
3908 		MC_LOG("mc_scf_log_event: mc_get_mem_addr failed\n");
3909 		goto out;
3910 	}
3911 
3912 	if (parse_unum_memory(unum, &board, dname) != 0) {
3913 		MC_LOG("mc_scf_log_event: parse_unum_memory failed\n");
3914 		rv = EINVAL;
3915 		goto out;
3916 	}
3917 
3918 	if (board < 0) {
3919 		MC_LOG("mc_scf_log_event: Invalid board=%d dimm=%s\n",
3920 		    board, dname);
3921 		rv = EINVAL;
3922 		goto out;
3923 	}
3924 
3925 	if (dname_to_bankslot(dname, &bank, &slot) != 0) {
3926 		MC_LOG("mc_scf_log_event: dname_to_bankslot failed\n");
3927 		rv = EINVAL;
3928 		goto out;
3929 	}
3930 
3931 	mutex_enter(&mcmutex);
3932 
3933 	flt_stat.mf_err_add = flt_pag->err_add;
3934 	flt_stat.mf_err_log = flt_pag->err_log;
3935 	flt_stat.mf_flt_paddr = pa;
3936 
3937 	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
3938 		mutex_exit(&mcmutex);
3939 		MC_LOG("mc_scf_log_event: invalid pa\n");
3940 		rv = EINVAL;
3941 		goto out;
3942 	}
3943 
3944 	MC_LOG("mc_scf_log_event: DIMM%s, /LSB%d/B%d/%x, pa %lx elog %x\n",
3945 	    unum, mcp->mc_board_num, bank, flt_pag->err_add, pa,
3946 	    flt_pag->err_log);
3947 
3948 	mutex_enter(&mcp->mc_lock);
3949 
3950 	if (!pa_is_valid(mcp, pa)) {
3951 		mutex_exit(&mcp->mc_lock);
3952 		mutex_exit(&mcmutex);
3953 		rv = EINVAL;
3954 		goto out;
3955 	}
3956 
3957 	rv = 0;
3958 
3959 	mc_queue_scf_log(mcp, &flt_stat, bank);
3960 
3961 	mutex_exit(&mcp->mc_lock);
3962 	mutex_exit(&mcmutex);
3963 
3964 out:
3965 	kmem_free(unum, flt_pag->fmri_sz);
3966 	kmem_free(sid, sid_sz);
3967 
3968 	return (rv);
3969 }
3970 
3971 #ifdef DEBUG
3972 void
3973 mc_dump_dimm(char *buf, int dnamesz, int serialsz, int partnumsz)
3974 {
3975 	char dname[MCOPL_MAX_DIMMNAME + 1];
3976 	char serial[MCOPL_MAX_SERIAL + 1];
3977 	char part[ MCOPL_MAX_PARTNUM + 1];
3978 	char *b;
3979 
3980 	b = buf;
3981 	bcopy(b, dname, dnamesz);
3982 	dname[dnamesz] = 0;
3983 
3984 	b += dnamesz;
3985 	bcopy(b, serial, serialsz);
3986 	serial[serialsz] = 0;
3987 
3988 	b += serialsz;
3989 	bcopy(b, part, partnumsz);
3990 	part[partnumsz] = 0;
3991 
3992 	printf("DIMM=%s  Serial=%s PartNum=%s\n", dname, serial, part);
3993 }
3994 
3995 void
3996 mc_dump_dimm_info(board_dimm_info_t *bd_dimmp)
3997 {
3998 	int	dimm;
3999 	int	dnamesz = bd_dimmp->bd_dnamesz;
4000 	int	sersz = bd_dimmp->bd_serialsz;
4001 	int	partsz = bd_dimmp->bd_partnumsz;
4002 	char	*buf;
4003 
4004 	printf("Version=%d Board=%02d DIMMs=%d NameSize=%d "
4005 	    "SerialSize=%d PartnumSize=%d\n", bd_dimmp->bd_version,
4006 	    bd_dimmp->bd_boardnum, bd_dimmp->bd_numdimms, bd_dimmp->bd_dnamesz,
4007 	    bd_dimmp->bd_serialsz, bd_dimmp->bd_partnumsz);
4008 	printf("======================================================\n");
4009 
4010 	buf = (char *)(bd_dimmp + 1);
4011 	for (dimm = 0; dimm < bd_dimmp->bd_numdimms; dimm++) {
4012 		mc_dump_dimm(buf, dnamesz, sersz, partsz);
4013 		buf += dnamesz + sersz + partsz;
4014 	}
4015 	printf("======================================================\n");
4016 }
4017 
4018 
4019 /* ARGSUSED */
4020 static int
4021 mc_ioctl_debug(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
4022 	int *rvalp)
4023 {
4024 	caddr_t	buf, kbuf;
4025 	uint64_t pa;
4026 	int rv = 0;
4027 	int i;
4028 	uint32_t flags;
4029 	static uint32_t offset = 0;
4030 
4031 
4032 	flags = (cmd >> 4) & 0xfffffff;
4033 
4034 	cmd &= 0xf;
4035 
4036 	MC_LOG("mc_ioctl(cmd = %x, flags = %x)\n", cmd, flags);
4037 
4038 	if (arg != NULL) {
4039 		if (ddi_copyin((const void *)arg, (void *)&pa,
4040 		    sizeof (uint64_t), 0) < 0) {
4041 			rv = EFAULT;
4042 			return (rv);
4043 		}
4044 		buf = NULL;
4045 	} else {
4046 		buf = (caddr_t)kmem_alloc(PAGESIZE, KM_SLEEP);
4047 
4048 		pa = va_to_pa(buf);
4049 		pa += offset;
4050 
4051 		offset += 64;
4052 		if (offset >= PAGESIZE)
4053 			offset = 0;
4054 	}
4055 
4056 	switch (cmd) {
4057 	case MCI_CE:
4058 		(void) mc_inject_error(MC_INJECT_INTERMITTENT_CE, pa, flags);
4059 		break;
4060 	case MCI_PERM_CE:
4061 		(void) mc_inject_error(MC_INJECT_PERMANENT_CE, pa, flags);
4062 		break;
4063 	case MCI_UE:
4064 		(void) mc_inject_error(MC_INJECT_UE, pa, flags);
4065 		break;
4066 	case MCI_M_CE:
4067 		(void) mc_inject_error(MC_INJECT_INTERMITTENT_MCE, pa, flags);
4068 		break;
4069 	case MCI_M_PCE:
4070 		(void) mc_inject_error(MC_INJECT_PERMANENT_MCE, pa, flags);
4071 		break;
4072 	case MCI_M_UE:
4073 		(void) mc_inject_error(MC_INJECT_MUE, pa, flags);
4074 		break;
4075 	case MCI_CMP:
4076 		(void) mc_inject_error(MC_INJECT_CMPE, pa, flags);
4077 		break;
4078 	case MCI_NOP:
4079 		(void) mc_inject_error(MC_INJECT_NOP, pa, flags); break;
4080 	case MCI_SHOW_ALL:
4081 		mc_debug_show_all = 1;
4082 		break;
4083 	case MCI_SHOW_NONE:
4084 		mc_debug_show_all = 0;
4085 		break;
4086 	case MCI_ALLOC:
4087 		/*
4088 		 * just allocate some kernel memory and never free it
4089 		 * 512 MB seems to be the maximum size supported.
4090 		 */
4091 		cmn_err(CE_NOTE, "Allocating kmem %d MB\n", flags * 512);
4092 		for (i = 0; i < flags; i++) {
4093 			kbuf = kmem_alloc(512 * 1024 * 1024, KM_SLEEP);
4094 			cmn_err(CE_NOTE, "kmem buf %llx PA %llx\n",
4095 			    (u_longlong_t)kbuf, (u_longlong_t)va_to_pa(kbuf));
4096 		}
4097 		break;
4098 	case MCI_SUSPEND:
4099 		(void) opl_mc_suspend();
4100 		break;
4101 	case MCI_RESUME:
4102 		(void) opl_mc_resume();
4103 		break;
4104 	default:
4105 		rv = ENXIO;
4106 	}
4107 	if (buf)
4108 		kmem_free(buf, PAGESIZE);
4109 
4110 	return (rv);
4111 }
4112 
4113 #endif /* DEBUG */
4114