xref: /titanic_50/usr/src/uts/sun4u/cpu/opl_olympus.c (revision 535096c2bb10e7c765411fcb939b54c081ba4e07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/opl_olympus_regs.h>
45 #include <sys/opl_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/ontrap.h>
56 #include <sys/panic.h>
57 #include <sys/memlist.h>
58 #include <sys/ndifm.h>
59 #include <sys/ddifm.h>
60 #include <sys/fm/protocol.h>
61 #include <sys/fm/util.h>
62 #include <sys/fm/cpu/SPARC64-VI.h>
63 #include <sys/dtrace.h>
64 #include <sys/watchpoint.h>
65 #include <sys/promif.h>
66 
67 /*
68  * Internal functions.
69  */
70 static int cpu_sync_log_err(void *flt);
71 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
72 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
73 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
74 
75 /*
76  * Error counters resetting interval.
77  */
78 static int opl_async_check_interval = 60;		/* 1 min */
79 
80 uint_t cpu_impl_dual_pgsz = 1;
81 
82 /*
83  * PA[22:0] represent Displacement in Jupiter
84  * configuration space.
85  */
86 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
87 
88 /*
89  * set in /etc/system to control logging of user BERR/TO's
90  */
91 int cpu_berr_to_verbose = 0;
92 
93 static int min_ecache_size;
94 static uint_t priv_hcl_1;
95 static uint_t priv_hcl_2;
96 static uint_t priv_hcl_4;
97 static uint_t priv_hcl_8;
98 
99 /*
100  * Olympus error log
101  */
102 static opl_errlog_t	*opl_err_log;
103 
104 /*
105  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
106  * No any other ecc_type_info insertion is allowed in between the following
107  * four UE classess.
108  */
109 ecc_type_to_info_t ecc_type_to_info[] = {
110 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
111 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
112 	FM_EREPORT_CPU_UE_MEM,
113 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
114 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
115 	FM_EREPORT_CPU_UE_CHANNEL,
116 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
117 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
118 	FM_EREPORT_CPU_UE_CPU,
119 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
120 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
121 	FM_EREPORT_CPU_UE_PATH,
122 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
123 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
124 	FM_EREPORT_CPU_BERR,
125 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
126 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
127 	FM_EREPORT_CPU_BTO,
128 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
129 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
130 	FM_EREPORT_CPU_MTLB,
131 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
132 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
133 	FM_EREPORT_CPU_TLBP,
134 
135 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
136 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
137 	FM_EREPORT_CPU_CRE,
138 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
139 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
140 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
141 	FM_EREPORT_CPU_TSBCTX,
142 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
143 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
144 	FM_EREPORT_CPU_TSBP,
145 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
146 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
147 	FM_EREPORT_CPU_PSTATE,
148 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
149 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
150 	FM_EREPORT_CPU_TSTATE,
151 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
152 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
153 	FM_EREPORT_CPU_IUG_F,
154 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
155 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
156 	FM_EREPORT_CPU_IUG_R,
157 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
158 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
159 	FM_EREPORT_CPU_SDC,
160 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
161 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
162 	FM_EREPORT_CPU_WDT,
163 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
164 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
165 	FM_EREPORT_CPU_DTLB,
166 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
167 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
168 	FM_EREPORT_CPU_ITLB,
169 	UGESR_IUG_COREERR, "IUG_COREERR",
170 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
171 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
172 	FM_EREPORT_CPU_CORE,
173 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
174 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
175 	FM_EREPORT_CPU_DAE,
176 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
177 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
178 	FM_EREPORT_CPU_IAE,
179 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
180 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
181 	FM_EREPORT_CPU_UGE,
182 	0,		NULL,		0,		0,
183 	NULL,  0,	   0,
184 };
185 
186 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
187 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
188 		int *segsp, int *banksp, int *mcidp);
189 
190 
191 /*
192  * Setup trap handlers for 0xA, 0x32, 0x40 trap types.
193  */
194 void
195 cpu_init_trap(void)
196 {
197 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
198 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
199 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
200 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
201 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
202 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
203 }
204 
205 static int
206 getintprop(pnode_t node, char *name, int deflt)
207 {
208 	int	value;
209 
210 	switch (prom_getproplen(node, name)) {
211 	case sizeof (int):
212 		(void) prom_getprop(node, name, (caddr_t)&value);
213 		break;
214 
215 	default:
216 		value = deflt;
217 		break;
218 	}
219 
220 	return (value);
221 }
222 
223 /*
224  * Set the magic constants of the implementation.
225  */
226 /*ARGSUSED*/
227 void
228 cpu_fiximp(pnode_t dnode)
229 {
230 	int i, a;
231 	extern int vac_size, vac_shift;
232 	extern uint_t vac_mask;
233 
234 	static struct {
235 		char	*name;
236 		int	*var;
237 		int	defval;
238 	} prop[] = {
239 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
240 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
241 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
242 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
243 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
244 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
245 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
246 	};
247 
248 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
249 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
250 
251 	ecache_setsize = ecache_size / ecache_associativity;
252 
253 	vac_size = OPL_VAC_SIZE;
254 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
255 	i = 0; a = vac_size;
256 	while (a >>= 1)
257 		++i;
258 	vac_shift = i;
259 	shm_alignment = vac_size;
260 	vac = 1;
261 }
262 
263 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
264 /*
265  * Quick and dirty way to redefine locally in
266  * OPL the value of IDSR_BN_SETS to 31 instead
267  * of the standard 32 value. This is to workaround
268  * REV_B of Olympus_c processor's problem in handling
269  * more than 31 xcall broadcast.
270  */
271 #undef	IDSR_BN_SETS
272 #define	IDSR_BN_SETS    31
273 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
274 
275 void
276 send_mondo_set(cpuset_t set)
277 {
278 	int lo, busy, nack, shipped = 0;
279 	uint16_t i, cpuids[IDSR_BN_SETS];
280 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
281 	uint64_t starttick, endtick, tick, lasttick;
282 #if (NCPU > IDSR_BN_SETS)
283 	int index = 0;
284 	int ncpuids = 0;
285 #endif
286 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
287 	int bn_sets = IDSR_BN_SETS;
288 	uint64_t ver;
289 
290 	ASSERT(NCPU > bn_sets);
291 #endif
292 
293 	ASSERT(!CPUSET_ISNULL(set));
294 	starttick = lasttick = gettick();
295 
296 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
297 	ver = ultra_getver();
298 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
299 		((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
300 		bn_sets = 1;
301 #endif
302 
303 #if (NCPU <= IDSR_BN_SETS)
304 	for (i = 0; i < NCPU; i++)
305 		if (CPU_IN_SET(set, i)) {
306 			shipit(i, shipped);
307 			nackmask |= IDSR_NACK_BIT(shipped);
308 			cpuids[shipped++] = i;
309 			CPUSET_DEL(set, i);
310 			if (CPUSET_ISNULL(set))
311 				break;
312 		}
313 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
314 #else
315 	for (i = 0; i < NCPU; i++)
316 		if (CPU_IN_SET(set, i)) {
317 			ncpuids++;
318 
319 			/*
320 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
321 			 * find we have shipped to more than (IDSR_BN_SETS)
322 			 * CPUs, set "index" to the highest numbered CPU in
323 			 * the set so we can ship to other CPUs a bit later on.
324 			 */
325 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
326 			if (shipped < bn_sets) {
327 #else
328 			if (shipped < IDSR_BN_SETS) {
329 #endif
330 				shipit(i, shipped);
331 				nackmask |= IDSR_NACK_BIT(shipped);
332 				cpuids[shipped++] = i;
333 				CPUSET_DEL(set, i);
334 				if (CPUSET_ISNULL(set))
335 					break;
336 			} else
337 				index = (int)i;
338 		}
339 
340 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
341 #endif
342 
343 	busymask = IDSR_NACK_TO_BUSY(nackmask);
344 	busy = nack = 0;
345 	endtick = starttick + xc_tick_limit;
346 	for (;;) {
347 		idsr = getidsr();
348 #if (NCPU <= IDSR_BN_SETS)
349 		if (idsr == 0)
350 			break;
351 #else
352 		if (idsr == 0 && shipped == ncpuids)
353 			break;
354 #endif
355 		tick = gettick();
356 		/*
357 		 * If there is a big jump between the current tick
358 		 * count and lasttick, we have probably hit a break
359 		 * point.  Adjust endtick accordingly to avoid panic.
360 		 */
361 		if (tick > (lasttick + xc_tick_jump_limit))
362 			endtick += (tick - lasttick);
363 		lasttick = tick;
364 		if (tick > endtick) {
365 			if (panic_quiesce)
366 				return;
367 			cmn_err(CE_CONT, "send mondo timeout "
368 				"[%d NACK %d BUSY]\nIDSR 0x%"
369 				"" PRIx64 "  cpuids:", nack, busy, idsr);
370 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
371 			for (i = 0; i < bn_sets; i++) {
372 #else
373 			for (i = 0; i < IDSR_BN_SETS; i++) {
374 #endif
375 				if (idsr & (IDSR_NACK_BIT(i) |
376 				    IDSR_BUSY_BIT(i))) {
377 					cmn_err(CE_CONT, " 0x%x",
378 						cpuids[i]);
379 				}
380 			}
381 			cmn_err(CE_CONT, "\n");
382 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
383 		}
384 		curnack = idsr & nackmask;
385 		curbusy = idsr & busymask;
386 
387 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
388 		/*
389 		 * Only proceed to send more xcalls if all the
390 		 * cpus in the previous IDSR_BN_SETS were completed.
391 		 */
392 		if (curbusy) {
393 			busy++;
394 			continue;
395 		}
396 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
397 
398 #if (NCPU > IDSR_BN_SETS)
399 		if (shipped < ncpuids) {
400 			uint64_t cpus_left;
401 			uint16_t next = (uint16_t)index;
402 
403 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
404 			    busymask;
405 
406 			if (cpus_left) {
407 				do {
408 					/*
409 					 * Sequence through and ship to the
410 					 * remainder of the CPUs in the system
411 					 * (e.g. other than the first
412 					 * (IDSR_BN_SETS)) in reverse order.
413 					 */
414 					lo = lowbit(cpus_left) - 1;
415 					i = IDSR_BUSY_IDX(lo);
416 					shipit(next, i);
417 					shipped++;
418 					cpuids[i] = next;
419 
420 					/*
421 					 * If we've processed all the CPUs,
422 					 * exit the loop now and save
423 					 * instructions.
424 					 */
425 					if (shipped == ncpuids)
426 						break;
427 
428 					for ((index = ((int)next - 1));
429 						index >= 0; index--)
430 						if (CPU_IN_SET(set, index)) {
431 							next = (uint16_t)index;
432 							break;
433 						}
434 
435 					cpus_left &= ~(1ull << lo);
436 				} while (cpus_left);
437 				continue;
438 			}
439 		}
440 #endif
441 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
442 		if (curbusy) {
443 			busy++;
444 			continue;
445 		}
446 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
447 #ifdef SEND_MONDO_STATS
448 		{
449 			int n = gettick() - starttick;
450 			if (n < 8192)
451 				x_nack_stimes[n >> 7]++;
452 		}
453 #endif
454 		while (gettick() < (tick + sys_clock_mhz))
455 			;
456 		do {
457 			lo = lowbit(curnack) - 1;
458 			i = IDSR_NACK_IDX(lo);
459 			shipit(cpuids[i], i);
460 			curnack &= ~(1ull << lo);
461 		} while (curnack);
462 		nack++;
463 		busy = 0;
464 	}
465 #ifdef SEND_MONDO_STATS
466 	{
467 		int n = gettick() - starttick;
468 		if (n < 8192)
469 			x_set_stimes[n >> 7]++;
470 		else
471 			x_set_ltimes[(n >> 13) & 0xf]++;
472 	}
473 	x_set_cpus[shipped]++;
474 #endif
475 }
476 
477 /*
478  * Cpu private initialization.
479  */
480 void
481 cpu_init_private(struct cpu *cp)
482 {
483 	if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) {
484 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported",
485 			cp->cpu_id, cpunodes[cp->cpu_id].implementation);
486 	}
487 
488 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
489 }
490 
491 void
492 cpu_setup(void)
493 {
494 	extern int at_flags;
495 	extern int disable_delay_tlb_flush, delay_tlb_flush;
496 	extern int cpc_has_overflow_intr;
497 	extern int disable_text_largepages;
498 	extern int use_text_pgsz4m;
499 	uint64_t cpu0_log;
500 	extern	 uint64_t opl_cpu0_err_log;
501 
502 	/*
503 	 * Initialize Error log Scratch register for error handling.
504 	 */
505 
506 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
507 	opl_error_setup(cpu0_log);
508 
509 	/*
510 	 * Enable MMU translating multiple page sizes for
511 	 * sITLB and sDTLB.
512 	 */
513 	opl_mpg_enable();
514 
515 	/*
516 	 * Setup chip-specific trap handlers.
517 	 */
518 	cpu_init_trap();
519 
520 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
521 
522 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
523 
524 	/*
525 	 * Due to the number of entries in the fully-associative tlb
526 	 * this may have to be tuned lower than in spitfire.
527 	 */
528 	pp_slots = MIN(8, MAXPP_SLOTS);
529 
530 	/*
531 	 * Block stores do not invalidate all pages of the d$, pagecopy
532 	 * et. al. need virtual translations with virtual coloring taken
533 	 * into consideration.  prefetch/ldd will pollute the d$ on the
534 	 * load side.
535 	 */
536 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
537 
538 	if (use_page_coloring) {
539 		do_pg_coloring = 1;
540 		if (use_virtual_coloring)
541 			do_virtual_coloring = 1;
542 	}
543 
544 	isa_list =
545 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
546 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
547 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
548 
549 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
550 
551 	/*
552 	 * On SPARC64-VI, there's no hole in the virtual address space
553 	 */
554 	hole_start = hole_end = 0;
555 
556 	/*
557 	 * The kpm mapping window.
558 	 * kpm_size:
559 	 *	The size of a single kpm range.
560 	 *	The overall size will be: kpm_size * vac_colors.
561 	 * kpm_vbase:
562 	 *	The virtual start address of the kpm range within the kernel
563 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
564 	 */
565 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
566 	kpm_size_shift = 47;
567 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
568 	kpm_smallpages = 1;
569 
570 	/*
571 	 * The traptrace code uses either %tick or %stick for
572 	 * timestamping.  We have %stick so we can use it.
573 	 */
574 	traptrace_use_stick = 1;
575 
576 	/*
577 	 * SPARC64-VI has a performance counter overflow interrupt
578 	 */
579 	cpc_has_overflow_intr = 1;
580 
581 	/*
582 	 * Use SPARC64-VI flush-all support
583 	 */
584 	if (!disable_delay_tlb_flush)
585 		delay_tlb_flush = 1;
586 
587 	/*
588 	 * Declare that this architecture/cpu combination does not support
589 	 * fpRAS.
590 	 */
591 	fpras_implemented = 0;
592 
593 	/*
594 	 * Enable 4M pages to be used for mapping user text by default.  Don't
595 	 * use large pages for initialized data segments since we may not know
596 	 * at exec() time what should be the preferred large page size for DTLB
597 	 * programming.
598 	 */
599 	use_text_pgsz4m = 1;
600 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
601 	    (1 << TTE32M) | (1 << TTE256M);
602 }
603 
604 /*
605  * Called by setcpudelay
606  */
607 void
608 cpu_init_tick_freq(void)
609 {
610 	/*
611 	 * For SPARC64-VI we want to use the system clock rate as
612 	 * the basis for low level timing, due to support of mixed
613 	 * speed CPUs and power managment.
614 	 */
615 	if (system_clock_freq == 0)
616 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
617 
618 	sys_tick_freq = system_clock_freq;
619 }
620 
621 #ifdef SEND_MONDO_STATS
622 uint32_t x_one_stimes[64];
623 uint32_t x_one_ltimes[16];
624 uint32_t x_set_stimes[64];
625 uint32_t x_set_ltimes[16];
626 uint32_t x_set_cpus[NCPU];
627 uint32_t x_nack_stimes[64];
628 #endif
629 
630 /*
631  * Note: A version of this function is used by the debugger via the KDI,
632  * and must be kept in sync with this version.  Any changes made to this
633  * function to support new chips or to accomodate errata must also be included
634  * in the KDI-specific version.  See us3_kdi.c.
635  */
636 void
637 send_one_mondo(int cpuid)
638 {
639 	int busy, nack;
640 	uint64_t idsr, starttick, endtick, tick, lasttick;
641 	uint64_t busymask;
642 
643 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
644 	starttick = lasttick = gettick();
645 	shipit(cpuid, 0);
646 	endtick = starttick + xc_tick_limit;
647 	busy = nack = 0;
648 	busymask = IDSR_BUSY;
649 	for (;;) {
650 		idsr = getidsr();
651 		if (idsr == 0)
652 			break;
653 
654 		tick = gettick();
655 		/*
656 		 * If there is a big jump between the current tick
657 		 * count and lasttick, we have probably hit a break
658 		 * point.  Adjust endtick accordingly to avoid panic.
659 		 */
660 		if (tick > (lasttick + xc_tick_jump_limit))
661 			endtick += (tick - lasttick);
662 		lasttick = tick;
663 		if (tick > endtick) {
664 			if (panic_quiesce)
665 				return;
666 			cmn_err(CE_PANIC, "send mondo timeout "
667 				"(target 0x%x) [%d NACK %d BUSY]",
668 					cpuid, nack, busy);
669 		}
670 
671 		if (idsr & busymask) {
672 			busy++;
673 			continue;
674 		}
675 		drv_usecwait(1);
676 		shipit(cpuid, 0);
677 		nack++;
678 		busy = 0;
679 	}
680 #ifdef SEND_MONDO_STATS
681 	{
682 		int n = gettick() - starttick;
683 		if (n < 8192)
684 			x_one_stimes[n >> 7]++;
685 		else
686 			x_one_ltimes[(n >> 13) & 0xf]++;
687 	}
688 #endif
689 }
690 
691 /*
692  * init_mmu_page_sizes is set to one after the bootup time initialization
693  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
694  * valid value.
695  *
696  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
697  * versions of disable_ism_large_pages and disable_large_pages, and feed back
698  * into those two hat variables at hat initialization time.
699  *
700  */
701 int init_mmu_page_sizes = 0;
702 static int mmu_disable_ism_large_pages = ((1 << TTE64K) |
703 	(1 << TTE512K) | (1 << TTE256M));
704 static int mmu_disable_auto_large_pages = ((1 << TTE64K) |
705 	(1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
706 static int mmu_disable_large_pages = 0;
707 
708 /*
709  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
710  * Called during very early bootup from check_cpus_set().
711  * Can be called to verify that mmu_page_sizes are set up correctly.
712  *
713  * Set Olympus defaults. We do not use the function parameter.
714  */
715 /*ARGSUSED*/
716 int
717 mmu_init_mmu_page_sizes(int32_t not_used)
718 {
719 	if (!init_mmu_page_sizes) {
720 		mmu_page_sizes = MMU_PAGE_SIZES;
721 		mmu_hashcnt = MAX_HASHCNT;
722 		mmu_ism_pagesize = MMU_PAGESIZE32M;
723 		auto_lpg_maxszc = TTE32M;
724 		mmu_exported_pagesize_mask = (1 << TTE8K) |
725 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
726 		    (1 << TTE32M) | (1 << TTE256M);
727 		init_mmu_page_sizes = 1;
728 		return (0);
729 	}
730 	return (1);
731 }
732 
733 /* SPARC64-VI worst case DTLB parameters */
734 #ifndef	LOCKED_DTLB_ENTRIES
735 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
736 #endif
737 #define	TOTAL_DTLB_ENTRIES	32
738 #define	AVAIL_32M_ENTRIES	0
739 #define	AVAIL_256M_ENTRIES	0
740 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
741 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
742 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
743 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
744 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
745 
746 /*
747  * The function returns the mmu-specific values for the
748  * hat's disable_large_pages, disable_ism_large_pages, and
749  * disable_auto_large_pages variables.
750  */
751 int
752 mmu_large_pages_disabled(uint_t flag)
753 {
754 	int pages_disable = 0;
755 
756 	if (flag == HAT_LOAD) {
757 		pages_disable =  mmu_disable_large_pages;
758 	} else if (flag == HAT_LOAD_SHARE) {
759 		pages_disable = mmu_disable_ism_large_pages;
760 	} else if (flag == HAT_LOAD_AUTOLPG) {
761 		pages_disable = mmu_disable_auto_large_pages;
762 	}
763 	return (pages_disable);
764 }
765 
766 /*
767  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
768  * It may be called from set_platform_defaults, if some value other than 32M
769  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
770  * then only warn, since it would be bad form to panic due to a user typo.
771  *
772  * The function re-initializes the mmu_disable_ism_large_pages variable.
773  */
774 void
775 mmu_init_large_pages(size_t ism_pagesize)
776 {
777 	switch (ism_pagesize) {
778 	case MMU_PAGESIZE4M:
779 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
780 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
781 		mmu_disable_auto_large_pages = ((1 << TTE64K) |
782 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
783 		auto_lpg_maxszc = TTE4M;
784 		break;
785 	case MMU_PAGESIZE32M:
786 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
787 		    (1 << TTE512K) | (1 << TTE256M));
788 		mmu_disable_auto_large_pages = ((1 << TTE64K) |
789 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
790 		auto_lpg_maxszc = TTE32M;
791 		break;
792 	case MMU_PAGESIZE256M:
793 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
794 		    (1 << TTE512K) | (1 << TTE32M));
795 		mmu_disable_auto_large_pages = ((1 << TTE64K) |
796 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
797 		auto_lpg_maxszc = TTE256M;
798 		break;
799 	default:
800 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
801 		    ism_pagesize);
802 		break;
803 	}
804 }
805 
806 /*ARGSUSED*/
807 uint_t
808 mmu_preferred_pgsz(struct hat *hat, caddr_t addr, size_t len)
809 {
810 	sfmmu_t *sfmmup = (sfmmu_t *)hat;
811 	uint_t pgsz0, pgsz1;
812 	uint_t szc, maxszc = mmu_page_sizes - 1;
813 	size_t pgsz;
814 	extern int disable_auto_large_pages;
815 
816 	pgsz0 = (uint_t)sfmmup->sfmmu_pgsz[0];
817 	pgsz1 = (uint_t)sfmmup->sfmmu_pgsz[1];
818 
819 	/*
820 	 * If either of the TLBs are reprogrammed, choose
821 	 * the largest mapping size as the preferred size,
822 	 * if it fits the size and alignment constraints.
823 	 * Else return the largest mapping size that fits,
824 	 * if neither TLB is reprogrammed.
825 	 */
826 	if (pgsz0 > TTE8K || pgsz1 > TTE8K) {
827 		if (pgsz1 > pgsz0) {	/* First try pgsz1 */
828 			pgsz = hw_page_array[pgsz1].hp_size;
829 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
830 				return (pgsz1);
831 		}
832 		if (pgsz0 > TTE8K) {	/* Then try pgsz0, if !TTE8K */
833 			pgsz = hw_page_array[pgsz0].hp_size;
834 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
835 				return (pgsz0);
836 		}
837 	} else { /* Otherwise pick best fit if neither TLB is reprogrammed. */
838 		for (szc = maxszc; szc > TTE8K; szc--) {
839 			if (disable_auto_large_pages & (1 << szc))
840 				continue;
841 
842 			pgsz = hw_page_array[szc].hp_size;
843 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
844 				return (szc);
845 		}
846 	}
847 	return (TTE8K);
848 }
849 
850 /*
851  * Function to reprogram the TLBs when page sizes used
852  * by a process change significantly.
853  */
854 void
855 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
856 {
857 	uint8_t pgsz0, pgsz1;
858 
859 	/*
860 	 * Don't program 2nd dtlb for kernel and ism hat
861 	 */
862 	ASSERT(hat->sfmmu_ismhat == NULL);
863 	ASSERT(hat != ksfmmup);
864 
865 	/*
866 	 * hat->sfmmu_pgsz[] is an array whose elements
867 	 * contain a sorted order of page sizes.  Element
868 	 * 0 is the most commonly used page size, followed
869 	 * by element 1, and so on.
870 	 *
871 	 * ttecnt[] is an array of per-page-size page counts
872 	 * mapped into the process.
873 	 *
874 	 * If the HAT's choice for page sizes is unsuitable,
875 	 * we can override it here.  The new values written
876 	 * to the array will be handed back to us later to
877 	 * do the actual programming of the TLB hardware.
878 	 *
879 	 */
880 	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
881 	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
882 
883 	/*
884 	 * This implements PAGESIZE programming of the sTLB
885 	 * if large TTE counts don't exceed the thresholds.
886 	 */
887 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
888 		pgsz0 = page_szc(MMU_PAGESIZE);
889 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
890 		pgsz1 = page_szc(MMU_PAGESIZE);
891 	tmp_pgsz[0] = pgsz0;
892 	tmp_pgsz[1] = pgsz1;
893 	/* otherwise, accept what the HAT chose for us */
894 }
895 
896 /*
897  * The HAT calls this function when an MMU context is allocated so that we
898  * can reprogram the large TLBs appropriately for the new process using
899  * the context.
900  *
901  * The caller must hold the HAT lock.
902  */
903 void
904 mmu_set_ctx_page_sizes(struct hat *hat)
905 {
906 	uint8_t pgsz0, pgsz1;
907 	uint8_t new_cext;
908 
909 	ASSERT(sfmmu_hat_lock_held(hat));
910 	/*
911 	 * Don't program 2nd dtlb for kernel and ism hat
912 	 */
913 	if (hat->sfmmu_ismhat || hat == ksfmmup)
914 		return;
915 
916 	/*
917 	 * If supported, reprogram the TLBs to a larger pagesize.
918 	 */
919 	pgsz0 = hat->sfmmu_pgsz[0];
920 	pgsz1 = hat->sfmmu_pgsz[1];
921 	ASSERT(pgsz0 < mmu_page_sizes);
922 	ASSERT(pgsz1 < mmu_page_sizes);
923 	new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
924 	if (hat->sfmmu_cext != new_cext) {
925 #ifdef DEBUG
926 		int i;
927 		/*
928 		 * assert cnum should be invalid, this is because pagesize
929 		 * can only be changed after a proc's ctxs are invalidated.
930 		 */
931 		for (i = 0; i < max_mmu_ctxdoms; i++) {
932 			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
933 		}
934 #endif /* DEBUG */
935 		hat->sfmmu_cext = new_cext;
936 	}
937 	/*
938 	 * sfmmu_setctx_sec() will take care of the
939 	 * rest of the dirty work for us.
940 	 */
941 }
942 
943 /*
944  * This function assumes that there are either four or six supported page
945  * sizes and at most two programmable TLBs, so we need to decide which
946  * page sizes are most important and then adjust the TLB page sizes
947  * accordingly (if supported).
948  *
949  * If these assumptions change, this function will need to be
950  * updated to support whatever the new limits are.
951  */
952 void
953 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
954 {
955 	uint64_t sortcnt[MMU_PAGE_SIZES];
956 	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
957 	uint8_t i, j, max;
958 	uint16_t oldval, newval;
959 
960 	/*
961 	 * We only consider reprogramming the TLBs if one or more of
962 	 * the two most used page sizes changes and we're using
963 	 * large pages in this process.
964 	 */
965 	if (sfmmup->sfmmu_flags & HAT_LGPG_FLAGS) {
966 		/* Sort page sizes. */
967 		for (i = 0; i < mmu_page_sizes; i++) {
968 			sortcnt[i] = ttecnt[i];
969 		}
970 		for (j = 0; j < mmu_page_sizes; j++) {
971 			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
972 				if (sortcnt[i] > sortcnt[max])
973 					max = i;
974 			}
975 			tmp_pgsz[j] = max;
976 			sortcnt[max] = 0;
977 		}
978 
979 		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
980 
981 		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
982 
983 		/* Check 2 largest values after the sort. */
984 		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
985 		if (newval != oldval) {
986 			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
987 		}
988 	}
989 }
990 
991 /*
992  * Return processor specific async error structure
993  * size used.
994  */
995 int
996 cpu_aflt_size(void)
997 {
998 	return (sizeof (opl_async_flt_t));
999 }
1000 
1001 /*
1002  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
1003  * post-process CPU events that are dequeued.  As such, it can be invoked
1004  * from softint context, from AST processing in the trap() flow, or from the
1005  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
1006  * Historically this entry point was used to log the actual cmn_err(9F) text;
1007  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
1008  * With FMA this function now also returns a flag which indicates to the
1009  * caller whether the ereport should be posted (1) or suppressed (0).
1010  */
1011 /*ARGSUSED*/
1012 static int
1013 cpu_sync_log_err(void *flt)
1014 {
1015 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
1016 	struct async_flt *aflt = (struct async_flt *)flt;
1017 
1018 	/*
1019 	 * No extra processing of urgent error events.
1020 	 * Always generate ereports for these events.
1021 	 */
1022 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
1023 		return (1);
1024 
1025 	/*
1026 	 * Additional processing for synchronous errors.
1027 	 */
1028 	switch (opl_flt->flt_type) {
1029 	case OPL_CPU_INV_SFSR:
1030 		return (1);
1031 
1032 	case OPL_CPU_SYNC_UE:
1033 		/*
1034 		 * The validity: SFSR_MK_UE bit has been checked
1035 		 * in opl_cpu_sync_error()
1036 		 * No more check is required.
1037 		 *
1038 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
1039 		 * and they have been retrieved in cpu_queue_events()
1040 		 */
1041 
1042 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
1043 			ASSERT(aflt->flt_in_memory);
1044 			/*
1045 			 * We want to skip logging only if ALL the following
1046 			 * conditions are true:
1047 			 *
1048 			 *	1. We are not panicing already.
1049 			 *	2. The error is a memory error.
1050 			 *	3. There is only one error.
1051 			 *	4. The error is on a retired page.
1052 			 *	5. The error occurred under on_trap
1053 			 *	protection AFLT_PROT_EC
1054 			 */
1055 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1056 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1057 				/*
1058 				 * Do not log an error from
1059 				 * the retired page
1060 				 */
1061 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1062 				return (0);
1063 			}
1064 			if (!panicstr)
1065 				cpu_page_retire(opl_flt);
1066 		}
1067 		return (1);
1068 
1069 	case OPL_CPU_SYNC_OTHERS:
1070 		/*
1071 		 * For the following error cases, the processor HW does
1072 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1073 		 * to assign appropriate values here to reflect what we
1074 		 * think is the most likely cause of the problem w.r.t to
1075 		 * the particular error event.  For Buserr and timeout
1076 		 * error event, we will assign OPL_ERRID_CHANNEL as the
1077 		 * most likely reason.  For TLB parity or multiple hit
1078 		 * error events, we will assign the reason as
1079 		 * OPL_ERRID_CPU (cpu related problem) and set the
1080 		 * flt_eid_sid to point to the cpuid.
1081 		 */
1082 
1083 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1084 			/*
1085 			 * flt_eid_sid will not be used for this case.
1086 			 */
1087 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1088 		}
1089 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1090 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1091 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1092 		}
1093 
1094 		/*
1095 		 * In case of no effective error bit
1096 		 */
1097 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1098 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1099 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1100 		}
1101 		break;
1102 
1103 		default:
1104 			return (1);
1105 	}
1106 	return (1);
1107 }
1108 
1109 /*
1110  * Retire the bad page that may contain the flushed error.
1111  */
1112 void
1113 cpu_page_retire(opl_async_flt_t *opl_flt)
1114 {
1115 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1116 	(void) page_retire(aflt->flt_addr, PR_UE);
1117 }
1118 
1119 /*
1120  * Invoked by error_init() early in startup and therefore before
1121  * startup_errorq() is called to drain any error Q -
1122  *
1123  * startup()
1124  *   startup_end()
1125  *     error_init()
1126  *       cpu_error_init()
1127  * errorq_init()
1128  *   errorq_drain()
1129  * start_other_cpus()
1130  *
1131  * The purpose of this routine is to create error-related taskqs.  Taskqs
1132  * are used for this purpose because cpu_lock can't be grabbed from interrupt
1133  * context.
1134  *
1135  */
1136 /*ARGSUSED*/
1137 void
1138 cpu_error_init(int items)
1139 {
1140 	opl_err_log = (opl_errlog_t *)
1141 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1142 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1143 		cmn_err(CE_PANIC, "The base address of the error log "
1144 		    "is not page aligned");
1145 }
1146 
1147 /*
1148  * We route all errors through a single switch statement.
1149  */
1150 void
1151 cpu_ue_log_err(struct async_flt *aflt)
1152 {
1153 	switch (aflt->flt_class) {
1154 	case CPU_FAULT:
1155 		if (cpu_sync_log_err(aflt))
1156 			cpu_ereport_post(aflt);
1157 		break;
1158 
1159 	case BUS_FAULT:
1160 		bus_async_log_err(aflt);
1161 		break;
1162 
1163 	default:
1164 		cmn_err(CE_WARN, "discarding async error %p with invalid "
1165 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1166 		return;
1167 	}
1168 }
1169 
1170 /*
1171  * Routine for panic hook callback from panic_idle().
1172  *
1173  * Nothing to do here.
1174  */
1175 void
1176 cpu_async_panic_callb(void)
1177 {
1178 }
1179 
1180 /*
1181  * Routine to return a string identifying the physical name
1182  * associated with a memory/cache error.
1183  */
1184 /*ARGSUSED*/
1185 int
1186 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1187     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1188     ushort_t flt_status, char *buf, int buflen, int *lenp)
1189 {
1190 	int synd_code;
1191 	int ret;
1192 
1193 	/*
1194 	 * An AFSR of -1 defaults to a memory syndrome.
1195 	 */
1196 	synd_code = (int)flt_synd;
1197 
1198 	if (&plat_get_mem_unum) {
1199 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1200 			flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1201 			buf[0] = '\0';
1202 			*lenp = 0;
1203 		}
1204 		return (ret);
1205 	}
1206 	buf[0] = '\0';
1207 	*lenp = 0;
1208 	return (ENOTSUP);
1209 }
1210 
1211 /*
1212  * Wrapper for cpu_get_mem_unum() routine that takes an
1213  * async_flt struct rather than explicit arguments.
1214  */
1215 int
1216 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1217     char *buf, int buflen, int *lenp)
1218 {
1219 	/*
1220 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1221 	 * memory error.
1222 	 */
1223 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1224 	    (uint64_t)-1,
1225 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1226 	    aflt->flt_status, buf, buflen, lenp));
1227 }
1228 
1229 /*
1230  * This routine is a more generic interface to cpu_get_mem_unum()
1231  * that may be used by other modules (e.g. mm).
1232  */
1233 /*ARGSUSED*/
1234 int
1235 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1236     char *buf, int buflen, int *lenp)
1237 {
1238 	int synd_status, flt_in_memory, ret;
1239 	ushort_t flt_status = 0;
1240 	char unum[UNUM_NAMLEN];
1241 
1242 	/*
1243 	 * Check for an invalid address.
1244 	 */
1245 	if (afar == (uint64_t)-1)
1246 		return (ENXIO);
1247 
1248 	if (synd == (uint64_t)-1)
1249 		synd_status = AFLT_STAT_INVALID;
1250 	else
1251 		synd_status = AFLT_STAT_VALID;
1252 
1253 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1254 		pf_is_memory(afar >> MMU_PAGESHIFT);
1255 
1256 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1257 		CPU->cpu_id, flt_in_memory, flt_status, unum,
1258 		UNUM_NAMLEN, lenp);
1259 	if (ret != 0)
1260 		return (ret);
1261 
1262 	if (*lenp >= buflen)
1263 		return (ENAMETOOLONG);
1264 
1265 	(void) strncpy(buf, unum, buflen);
1266 
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Routine to return memory information associated
1272  * with a physical address and syndrome.
1273  */
1274 /*ARGSUSED*/
1275 int
1276 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1277     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1278     int *segsp, int *banksp, int *mcidp)
1279 {
1280 	int synd_code = (int)synd;
1281 
1282 	if (afar == (uint64_t)-1)
1283 		return (ENXIO);
1284 
1285 	if (p2get_mem_info != NULL)
1286 		return ((p2get_mem_info)(synd_code, afar,
1287 			mem_sizep, seg_sizep, bank_sizep,
1288 			segsp, banksp, mcidp));
1289 	else
1290 		return (ENOTSUP);
1291 }
1292 
1293 /*
1294  * Routine to return a string identifying the physical
1295  * name associated with a cpuid.
1296  */
1297 int
1298 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1299 {
1300 	int ret;
1301 	char unum[UNUM_NAMLEN];
1302 
1303 	if (&plat_get_cpu_unum) {
1304 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
1305 			!= 0)
1306 			return (ret);
1307 	} else {
1308 		return (ENOTSUP);
1309 	}
1310 
1311 	if (*lenp >= buflen)
1312 		return (ENAMETOOLONG);
1313 
1314 	(void) strncpy(buf, unum, *lenp);
1315 
1316 	return (0);
1317 }
1318 
1319 /*
1320  * This routine exports the name buffer size.
1321  */
1322 size_t
1323 cpu_get_name_bufsize()
1324 {
1325 	return (UNUM_NAMLEN);
1326 }
1327 
1328 /*
1329  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1330  */
1331 void
1332 cpu_flush_ecache(void)
1333 {
1334 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1335 	    cpunodes[CPU->cpu_id].ecache_linesize);
1336 }
1337 
1338 static uint8_t
1339 flt_to_trap_type(struct async_flt *aflt)
1340 {
1341 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1342 		return (TRAP_TYPE_ECC_I);
1343 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1344 		return (TRAP_TYPE_ECC_D);
1345 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1346 		return (TRAP_TYPE_URGENT);
1347 	return (-1);
1348 }
1349 
1350 /*
1351  * Encode the data saved in the opl_async_flt_t struct into
1352  * the FM ereport payload.
1353  */
1354 /* ARGSUSED */
1355 static void
1356 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1357 		nvlist_t *resource)
1358 {
1359 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1360 	char unum[UNUM_NAMLEN];
1361 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1362 	int len;
1363 
1364 
1365 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1366 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1367 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1368 	}
1369 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1370 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1371 			DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1372 	}
1373 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1374 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1375 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1376 	}
1377 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1378 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1379 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1380 	}
1381 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1382 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1383 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1384 	}
1385 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1386 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1387 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1388 	}
1389 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1390 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1391 		    DATA_TYPE_BOOLEAN_VALUE,
1392 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1393 	}
1394 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1395 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1396 			DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1397 	}
1398 
1399 	switch (opl_flt->flt_eid_mod) {
1400 	case OPL_ERRID_CPU:
1401 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1402 			(u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1403 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1404 			NULL, opl_flt->flt_eid_sid,
1405 			(uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version,
1406 			sbuf);
1407 		fm_payload_set(payload,
1408 			FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1409 			DATA_TYPE_NVLIST, resource, NULL);
1410 		break;
1411 
1412 	case OPL_ERRID_CHANNEL:
1413 		/*
1414 		 * No resource is created but the cpumem DE will find
1415 		 * the defective path by retreiving EID from SFSR which is
1416 		 * included in the payload.
1417 		 */
1418 		break;
1419 
1420 	case OPL_ERRID_MEM:
1421 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1422 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
1423 			NULL, unum, NULL, (uint64_t)-1);
1424 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1425 			DATA_TYPE_NVLIST, resource, NULL);
1426 		break;
1427 
1428 	case OPL_ERRID_PATH:
1429 		/*
1430 		 * No resource is created but the cpumem DE will find
1431 		 * the defective path by retreiving EID from SFSR which is
1432 		 * included in the payload.
1433 		 */
1434 		break;
1435 	}
1436 }
1437 
1438 /*
1439  * Returns whether fault address is valid for this error bit and
1440  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1441  */
1442 /*ARGSUSED*/
1443 static int
1444 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1445 {
1446 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1447 
1448 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1449 		return ((t_afsr_bit & SFSR_MEMORY) &&
1450 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1451 	}
1452 	return (0);
1453 }
1454 
1455 /*
1456  * In OPL SCF does the stick synchronization.
1457  */
1458 void
1459 sticksync_slave(void)
1460 {
1461 }
1462 
1463 /*
1464  * In OPL SCF does the stick synchronization.
1465  */
1466 void
1467 sticksync_master(void)
1468 {
1469 }
1470 
1471 /*
1472  * Cpu private unitialization.  OPL cpus do not use the private area.
1473  */
1474 void
1475 cpu_uninit_private(struct cpu *cp)
1476 {
1477 	cmp_delete_cpu(cp->cpu_id);
1478 }
1479 
1480 /*
1481  * Always flush an entire cache.
1482  */
1483 void
1484 cpu_error_ecache_flush(void)
1485 {
1486 	cpu_flush_ecache();
1487 }
1488 
1489 void
1490 cpu_ereport_post(struct async_flt *aflt)
1491 {
1492 	char *cpu_type, buf[FM_MAX_CLASS];
1493 	nv_alloc_t *nva = NULL;
1494 	nvlist_t *ereport, *detector, *resource;
1495 	errorq_elem_t *eqep;
1496 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1497 
1498 	if (aflt->flt_panic || panicstr) {
1499 		eqep = errorq_reserve(ereport_errorq);
1500 		if (eqep == NULL)
1501 			return;
1502 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1503 		nva = errorq_elem_nva(ereport_errorq, eqep);
1504 	} else {
1505 		ereport = fm_nvlist_create(nva);
1506 	}
1507 
1508 	/*
1509 	 * Create the scheme "cpu" FMRI.
1510 	 */
1511 	detector = fm_nvlist_create(nva);
1512 	resource = fm_nvlist_create(nva);
1513 	switch (cpunodes[aflt->flt_inst].implementation) {
1514 	case OLYMPUS_C_IMPL:
1515 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1516 		break;
1517 	default:
1518 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1519 		break;
1520 	}
1521 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1522 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1523 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1524 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1525 	    sbuf);
1526 
1527 	/*
1528 	 * Encode all the common data into the ereport.
1529 	 */
1530 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1531 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1532 
1533 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1534 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1535 
1536 	/*
1537 	 * Encode the error specific data that was saved in
1538 	 * the async_flt structure into the ereport.
1539 	 */
1540 	cpu_payload_add_aflt(aflt, ereport, resource);
1541 
1542 	if (aflt->flt_panic || panicstr) {
1543 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1544 	} else {
1545 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1546 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1547 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1548 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1549 	}
1550 }
1551 
1552 void
1553 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1554 {
1555 	int status;
1556 	ddi_fm_error_t de;
1557 
1558 	bzero(&de, sizeof (ddi_fm_error_t));
1559 
1560 	de.fme_version = DDI_FME_VERSION;
1561 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1562 	de.fme_flag = expected;
1563 	de.fme_bus_specific = (void *)aflt->flt_addr;
1564 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1565 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1566 		aflt->flt_panic = 1;
1567 }
1568 
1569 void
1570 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1571     errorq_t *eqp, uint_t flag)
1572 {
1573 	struct async_flt *aflt = (struct async_flt *)payload;
1574 
1575 	aflt->flt_erpt_class = error_class;
1576 	errorq_dispatch(eqp, payload, payload_sz, flag);
1577 }
1578 
1579 void
1580 adjust_hw_copy_limits(int ecache_size)
1581 {
1582 	/*
1583 	 * Set hw copy limits.
1584 	 *
1585 	 * /etc/system will be parsed later and can override one or more
1586 	 * of these settings.
1587 	 *
1588 	 * At this time, ecache size seems only mildly relevant.
1589 	 * We seem to run into issues with the d-cache and stalls
1590 	 * we see on misses.
1591 	 *
1592 	 * Cycle measurement indicates that 2 byte aligned copies fare
1593 	 * little better than doing things with VIS at around 512 bytes.
1594 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1595 	 * aligned is faster whenever the source and destination data
1596 	 * in cache and the total size is less than 2 Kbytes.  The 2K
1597 	 * limit seems to be driven by the 2K write cache.
1598 	 * When more than 2K of copies are done in non-VIS mode, stores
1599 	 * backup in the write cache.  In VIS mode, the write cache is
1600 	 * bypassed, allowing faster cache-line writes aligned on cache
1601 	 * boundaries.
1602 	 *
1603 	 * In addition, in non-VIS mode, there is no prefetching, so
1604 	 * for larger copies, the advantage of prefetching to avoid even
1605 	 * occasional cache misses is enough to justify using the VIS code.
1606 	 *
1607 	 * During testing, it was discovered that netbench ran 3% slower
1608 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1609 	 * applications, data is only used once (copied to the output
1610 	 * buffer, then copied by the network device off the system).  Using
1611 	 * the VIS copy saves more L2 cache state.  Network copies are
1612 	 * around 1.3K to 1.5K in size for historical reasons.
1613 	 *
1614 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1615 	 * aligned copy even for large caches and 8 MB ecache.  The
1616 	 * infrastructure to allow different limits for different sized
1617 	 * caches is kept to allow further tuning in later releases.
1618 	 */
1619 
1620 	if (min_ecache_size == 0 && use_hw_bcopy) {
1621 		/*
1622 		 * First time through - should be before /etc/system
1623 		 * is read.
1624 		 * Could skip the checks for zero but this lets us
1625 		 * preserve any debugger rewrites.
1626 		 */
1627 		if (hw_copy_limit_1 == 0) {
1628 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1629 			priv_hcl_1 = hw_copy_limit_1;
1630 		}
1631 		if (hw_copy_limit_2 == 0) {
1632 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1633 			priv_hcl_2 = hw_copy_limit_2;
1634 		}
1635 		if (hw_copy_limit_4 == 0) {
1636 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1637 			priv_hcl_4 = hw_copy_limit_4;
1638 		}
1639 		if (hw_copy_limit_8 == 0) {
1640 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1641 			priv_hcl_8 = hw_copy_limit_8;
1642 		}
1643 		min_ecache_size = ecache_size;
1644 	} else {
1645 		/*
1646 		 * MP initialization. Called *after* /etc/system has
1647 		 * been parsed. One CPU has already been initialized.
1648 		 * Need to cater for /etc/system having scragged one
1649 		 * of our values.
1650 		 */
1651 		if (ecache_size == min_ecache_size) {
1652 			/*
1653 			 * Same size ecache. We do nothing unless we
1654 			 * have a pessimistic ecache setting. In that
1655 			 * case we become more optimistic (if the cache is
1656 			 * large enough).
1657 			 */
1658 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1659 				/*
1660 				 * Need to adjust hw_copy_limit* from our
1661 				 * pessimistic uniprocessor value to a more
1662 				 * optimistic UP value *iff* it hasn't been
1663 				 * reset.
1664 				 */
1665 				if ((ecache_size > 1048576) &&
1666 				    (priv_hcl_8 == hw_copy_limit_8)) {
1667 					if (ecache_size <= 2097152)
1668 						hw_copy_limit_8 = 4 *
1669 						    VIS_COPY_THRESHOLD;
1670 					else if (ecache_size <= 4194304)
1671 						hw_copy_limit_8 = 4 *
1672 						    VIS_COPY_THRESHOLD;
1673 					else
1674 						hw_copy_limit_8 = 4 *
1675 						    VIS_COPY_THRESHOLD;
1676 					priv_hcl_8 = hw_copy_limit_8;
1677 				}
1678 			}
1679 		} else if (ecache_size < min_ecache_size) {
1680 			/*
1681 			 * A different ecache size. Can this even happen?
1682 			 */
1683 			if (priv_hcl_8 == hw_copy_limit_8) {
1684 				/*
1685 				 * The previous value that we set
1686 				 * is unchanged (i.e., it hasn't been
1687 				 * scragged by /etc/system). Rewrite it.
1688 				 */
1689 				if (ecache_size <= 1048576)
1690 					hw_copy_limit_8 = 8 *
1691 					    VIS_COPY_THRESHOLD;
1692 				else if (ecache_size <= 2097152)
1693 					hw_copy_limit_8 = 8 *
1694 					    VIS_COPY_THRESHOLD;
1695 				else if (ecache_size <= 4194304)
1696 					hw_copy_limit_8 = 8 *
1697 					    VIS_COPY_THRESHOLD;
1698 				else
1699 					hw_copy_limit_8 = 10 *
1700 					    VIS_COPY_THRESHOLD;
1701 				priv_hcl_8 = hw_copy_limit_8;
1702 				min_ecache_size = ecache_size;
1703 			}
1704 		}
1705 	}
1706 }
1707 
1708 #define	VIS_BLOCKSIZE		64
1709 
1710 int
1711 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1712 {
1713 	int ret, watched;
1714 
1715 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1716 	ret = dtrace_blksuword32(addr, data, 0);
1717 	if (watched)
1718 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1719 
1720 	return (ret);
1721 }
1722 
1723 void
1724 opl_cpu_reg_init()
1725 {
1726 	uint64_t	this_cpu_log;
1727 
1728 	/*
1729 	 * We do not need to re-initialize cpu0 registers.
1730 	 */
1731 	if (cpu[getprocessorid()] == &cpu0)
1732 		return;
1733 
1734 	/*
1735 	 * Initialize Error log Scratch register for error handling.
1736 	 */
1737 
1738 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1739 		ERRLOG_BUFSZ * (getprocessorid())));
1740 	opl_error_setup(this_cpu_log);
1741 
1742 	/*
1743 	 * Enable MMU translating multiple page sizes for
1744 	 * sITLB and sDTLB.
1745 	 */
1746 	opl_mpg_enable();
1747 }
1748 
1749 /*
1750  * Queue one event in ue_queue based on ecc_type_to_info entry.
1751  */
1752 static void
1753 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1754     ecc_type_to_info_t *eccp)
1755 {
1756 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1757 
1758 	if (reason &&
1759 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1760 		(void) strcat(reason, eccp->ec_reason);
1761 	}
1762 
1763 	opl_flt->flt_bit = eccp->ec_afsr_bit;
1764 	opl_flt->flt_type = eccp->ec_flt_type;
1765 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1766 	aflt->flt_payload = eccp->ec_err_payload;
1767 
1768 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1769 	cpu_errorq_dispatch(eccp->ec_err_class,
1770 		(void *)opl_flt, sizeof (opl_async_flt_t),
1771 		ue_queue,
1772 		aflt->flt_panic);
1773 }
1774 
1775 /*
1776  * Queue events on async event queue one event per error bit.
1777  * Return number of events queued.
1778  */
1779 int
1780 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1781 {
1782 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1783 	ecc_type_to_info_t *eccp;
1784 	int nevents = 0;
1785 
1786 	/*
1787 	 * Queue expected errors, error bit and fault type must must match
1788 	 * in the ecc_type_to_info table.
1789 	 */
1790 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1791 		eccp++) {
1792 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1793 		    (eccp->ec_flags & aflt->flt_status) != 0) {
1794 			/*
1795 			 * UE error event can be further
1796 			 * classified/breakdown into finer granularity
1797 			 * based on the flt_eid_mod value set by HW.  We do
1798 			 * special handling here so that we can report UE
1799 			 * error in finer granularity as ue_mem,
1800 			 * ue_channel, ue_cpu or ue_path.
1801 			 */
1802 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1803 				opl_flt->flt_eid_mod =
1804 					(aflt->flt_stat & SFSR_EID_MOD)
1805 					>> SFSR_EID_MOD_SHIFT;
1806 				opl_flt->flt_eid_sid =
1807 					(aflt->flt_stat & SFSR_EID_SID)
1808 					>> SFSR_EID_SID_SHIFT;
1809 				/*
1810 				 * Need to advance eccp pointer by flt_eid_mod
1811 				 * so that we get an appropriate ecc pointer
1812 				 *
1813 				 * EID			# of advances
1814 				 * ----------------------------------
1815 				 * OPL_ERRID_MEM	0
1816 				 * OPL_ERRID_CHANNEL	1
1817 				 * OPL_ERRID_CPU	2
1818 				 * OPL_ERRID_PATH	3
1819 				 */
1820 				eccp += opl_flt->flt_eid_mod;
1821 			}
1822 			cpu_queue_one_event(opl_flt, reason, eccp);
1823 			t_afsr_errs &= ~eccp->ec_afsr_bit;
1824 			nevents++;
1825 		}
1826 	}
1827 
1828 	return (nevents);
1829 }
1830 
1831 /*
1832  * Sync. error wrapper functions.
1833  * We use these functions in order to transfer here from the
1834  * nucleus trap handler information about trap type (data or
1835  * instruction) and trap level (0 or above 0). This way we
1836  * get rid of using SFSR's reserved bits.
1837  */
1838 
1839 #define	OPL_SYNC_TL0	0
1840 #define	OPL_SYNC_TL1	1
1841 #define	OPL_ISYNC_ERR	0
1842 #define	OPL_DSYNC_ERR	1
1843 
1844 void
1845 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1846 {
1847 	uint64_t t_sfar = p_sfar;
1848 	uint64_t t_sfsr = p_sfsr;
1849 
1850 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1851 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1852 }
1853 
1854 void
1855 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1856 {
1857 	uint64_t t_sfar = p_sfar;
1858 	uint64_t t_sfsr = p_sfsr;
1859 
1860 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1861 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1862 }
1863 
1864 void
1865 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1866 {
1867 	uint64_t t_sfar = p_sfar;
1868 	uint64_t t_sfsr = p_sfsr;
1869 
1870 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1871 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1872 }
1873 
1874 void
1875 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1876 {
1877 	uint64_t t_sfar = p_sfar;
1878 	uint64_t t_sfsr = p_sfsr;
1879 
1880 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1881 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1882 }
1883 
1884 /*
1885  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1886  * and TLB_PRT.
1887  * This function is designed based on cpu_deferred_error().
1888  */
1889 
1890 static void
1891 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1892     uint_t tl, uint_t derr)
1893 {
1894 	opl_async_flt_t opl_flt;
1895 	struct async_flt *aflt;
1896 	int trampolined = 0;
1897 	char pr_reason[MAX_REASON_STRING];
1898 	uint64_t log_sfsr;
1899 	int expected = DDI_FM_ERR_UNEXPECTED;
1900 	ddi_acc_hdl_t *hp;
1901 
1902 	/*
1903 	 * We need to look at p_flag to determine if the thread detected an
1904 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1905 	 * because we just need a consistent snapshot and we know that everyone
1906 	 * else will store a consistent set of bits while holding p_lock.  We
1907 	 * don't have to worry about a race because SDOCORE is set once prior
1908 	 * to doing i/o from the process's address space and is never cleared.
1909 	 */
1910 	uint_t pflag = ttoproc(curthread)->p_flag;
1911 
1912 	pr_reason[0] = '\0';
1913 
1914 	/*
1915 	 * handle the specific error
1916 	 */
1917 	bzero(&opl_flt, sizeof (opl_async_flt_t));
1918 	aflt = (struct async_flt *)&opl_flt;
1919 	aflt->flt_id = gethrtime_waitfree();
1920 	aflt->flt_bus_id = getprocessorid();
1921 	aflt->flt_inst = CPU->cpu_id;
1922 	aflt->flt_stat = t_sfsr;
1923 	aflt->flt_addr = t_sfar;
1924 	aflt->flt_pc = (caddr_t)rp->r_pc;
1925 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1926 	aflt->flt_class = (uchar_t)CPU_FAULT;
1927 	aflt->flt_priv = (uchar_t)
1928 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
1929 	aflt->flt_tl = (uchar_t)tl;
1930 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1931 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1932 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1933 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1934 
1935 	/*
1936 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1937 	 * So, clear all error bits to avoid mis-handling and force the system
1938 	 * panicked.
1939 	 * We skip all the procedures below down to the panic message call.
1940 	 */
1941 	if (!(t_sfsr & SFSR_FV)) {
1942 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1943 		aflt->flt_panic = 1;
1944 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1945 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
1946 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
1947 			aflt->flt_panic);
1948 		fm_panic("%sErrors(s)", "invalid SFSR");
1949 	}
1950 
1951 	/*
1952 	 * If either UE and MK bit is off, this is not valid UE error.
1953 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1954 	 * mis-handling below.
1955 	 * aflt->flt_stat keeps the original bits as a reference.
1956 	 */
1957 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1958 	    (SFSR_MK_UE|SFSR_UE)) {
1959 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1960 	}
1961 
1962 	/*
1963 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1964 	 * see if we were executing in the kernel under on_trap() or t_lofault
1965 	 * protection.  If so, modify the saved registers so that we return
1966 	 * from the trap to the appropriate trampoline routine.
1967 	 */
1968 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1969 		if (curthread->t_ontrap != NULL) {
1970 			on_trap_data_t *otp = curthread->t_ontrap;
1971 
1972 			if (otp->ot_prot & OT_DATA_EC) {
1973 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1974 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1975 				rp->r_pc = otp->ot_trampoline;
1976 				rp->r_npc = rp->r_pc + 4;
1977 				trampolined = 1;
1978 			}
1979 
1980 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1981 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1982 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
1983 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
1984 				rp->r_pc = otp->ot_trampoline;
1985 				rp->r_npc = rp->r_pc + 4;
1986 				trampolined = 1;
1987 				/*
1988 				 * for peeks and caut_gets errors are expected
1989 				 */
1990 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1991 				if (!hp)
1992 					expected = DDI_FM_ERR_PEEK;
1993 				else if (hp->ah_acc.devacc_attr_access ==
1994 				    DDI_CAUTIOUS_ACC)
1995 					expected = DDI_FM_ERR_EXPECTED;
1996 			}
1997 
1998 		} else if (curthread->t_lofault) {
1999 			aflt->flt_prot = AFLT_PROT_COPY;
2000 			rp->r_g1 = EFAULT;
2001 			rp->r_pc = curthread->t_lofault;
2002 			rp->r_npc = rp->r_pc + 4;
2003 			trampolined = 1;
2004 		}
2005 	}
2006 
2007 	/*
2008 	 * If we're in user mode or we're doing a protected copy, we either
2009 	 * want the ASTON code below to send a signal to the user process
2010 	 * or we want to panic if aft_panic is set.
2011 	 *
2012 	 * If we're in privileged mode and we're not doing a copy, then we
2013 	 * need to check if we've trampolined.  If we haven't trampolined,
2014 	 * we should panic.
2015 	 */
2016 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2017 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
2018 			aflt->flt_panic |= aft_panic;
2019 	} else if (!trampolined) {
2020 		aflt->flt_panic = 1;
2021 	}
2022 
2023 	/*
2024 	 * If we've trampolined due to a privileged TO or BERR, or if an
2025 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
2026 	 * event for that TO or BERR.  Queue all other events (if any) besides
2027 	 * the TO/BERR.
2028 	 */
2029 	log_sfsr = t_sfsr;
2030 	if (trampolined) {
2031 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2032 	} else if (!aflt->flt_priv) {
2033 		/*
2034 		 * User mode, suppress messages if
2035 		 * cpu_berr_to_verbose is not set.
2036 		 */
2037 		if (!cpu_berr_to_verbose)
2038 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2039 	}
2040 
2041 	if (((log_sfsr & SFSR_ERRS) &&
2042 		(cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) ||
2043 	    ((t_sfsr & SFSR_ERRS) == 0)) {
2044 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
2045 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
2046 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
2047 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
2048 			aflt->flt_panic);
2049 	}
2050 
2051 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2052 		cpu_run_bus_error_handlers(aflt, expected);
2053 	}
2054 
2055 	/*
2056 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2057 	 * be logged as part of the panic flow.
2058 	 */
2059 	if (aflt->flt_panic) {
2060 		if (pr_reason[0] == 0)
2061 			strcpy(pr_reason, "invalid SFSR ");
2062 
2063 		fm_panic("%sErrors(s)", pr_reason);
2064 	}
2065 
2066 	/*
2067 	 * If we queued an error and we are going to return from the trap and
2068 	 * the error was in user mode or inside of a copy routine, set AST flag
2069 	 * so the queue will be drained before returning to user mode.  The
2070 	 * AST processing will also act on our failure policy.
2071 	 */
2072 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2073 		int pcb_flag = 0;
2074 
2075 		if (t_sfsr & (SFSR_ERRS &
2076 			~(SFSR_BERR | SFSR_TO)))
2077 			pcb_flag |= ASYNC_HWERR;
2078 
2079 		if (t_sfsr & SFSR_BERR)
2080 			pcb_flag |= ASYNC_BERR;
2081 
2082 		if (t_sfsr & SFSR_TO)
2083 			pcb_flag |= ASYNC_BTO;
2084 
2085 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2086 		aston(curthread);
2087 	}
2088 }
2089 
2090 /*ARGSUSED*/
2091 void
2092 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2093 {
2094 	opl_async_flt_t opl_flt;
2095 	struct async_flt *aflt;
2096 	char pr_reason[MAX_REASON_STRING];
2097 
2098 	/* normalize tl */
2099 	tl = (tl >= 2 ? 1 : 0);
2100 	pr_reason[0] = '\0';
2101 
2102 	bzero(&opl_flt, sizeof (opl_async_flt_t));
2103 	aflt = (struct async_flt *)&opl_flt;
2104 	aflt->flt_id = gethrtime_waitfree();
2105 	aflt->flt_bus_id = getprocessorid();
2106 	aflt->flt_inst = CPU->cpu_id;
2107 	aflt->flt_stat = p_ugesr;
2108 	aflt->flt_pc = (caddr_t)rp->r_pc;
2109 	aflt->flt_class = (uchar_t)CPU_FAULT;
2110 	aflt->flt_tl = tl;
2111 	aflt->flt_priv = (uchar_t)
2112 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
2113 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2114 	aflt->flt_panic = 1;
2115 	/*
2116 	 * HW does not set mod/sid in case of urgent error.
2117 	 * So we have to set it here.
2118 	 */
2119 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2120 	opl_flt.flt_eid_sid = aflt->flt_inst;
2121 
2122 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2123 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2124 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2125 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG,
2126 			(void *)&opl_flt, sizeof (opl_async_flt_t),
2127 			ue_queue, aflt->flt_panic);
2128 	}
2129 
2130 	fm_panic("Urgent Error");
2131 }
2132 
2133 /*
2134  * Initialization error counters resetting.
2135  */
2136 /* ARGSUSED */
2137 static void
2138 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2139 {
2140 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2141 	hdlr->cyh_level = CY_LOW_LEVEL;
2142 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2143 
2144 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2145 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2146 }
2147 
2148 void
2149 cpu_mp_init(void)
2150 {
2151 	cyc_omni_handler_t hdlr;
2152 
2153 	hdlr.cyo_online = opl_ras_online;
2154 	hdlr.cyo_offline = NULL;
2155 	hdlr.cyo_arg = NULL;
2156 	mutex_enter(&cpu_lock);
2157 	(void) cyclic_add_omni(&hdlr);
2158 	mutex_exit(&cpu_lock);
2159 }
2160 
2161 /*ARGSUSED*/
2162 void
2163 mmu_init_kernel_pgsz(struct hat *hat)
2164 {
2165 }
2166 
2167 size_t
2168 mmu_get_kernel_lpsize(size_t lpsize)
2169 {
2170 	uint_t tte;
2171 
2172 	if (lpsize == 0) {
2173 		/* no setting for segkmem_lpsize in /etc/system: use default */
2174 		return (MMU_PAGESIZE4M);
2175 	}
2176 
2177 	for (tte = TTE8K; tte <= TTE4M; tte++) {
2178 		if (lpsize == TTEBYTES(tte))
2179 			return (lpsize);
2180 	}
2181 
2182 	return (TTEBYTES(TTE8K));
2183 }
2184 
2185 /*
2186  * The following are functions that are unused in
2187  * OPL cpu module. They are defined here to resolve
2188  * dependencies in the "unix" module.
2189  * Unused functions that should never be called in
2190  * OPL are coded with ASSERT(0).
2191  */
2192 
2193 void
2194 cpu_disable_errors(void)
2195 {}
2196 
2197 void
2198 cpu_enable_errors(void)
2199 { ASSERT(0); }
2200 
2201 /*ARGSUSED*/
2202 void
2203 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2204 { ASSERT(0); }
2205 
2206 /*ARGSUSED*/
2207 void
2208 cpu_faulted_enter(struct cpu *cp)
2209 {}
2210 
2211 /*ARGSUSED*/
2212 void
2213 cpu_faulted_exit(struct cpu *cp)
2214 {}
2215 
2216 /*ARGSUSED*/
2217 void
2218 cpu_check_allcpus(struct async_flt *aflt)
2219 {}
2220 
2221 /*ARGSUSED*/
2222 void
2223 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2224 { ASSERT(0); }
2225 
2226 /*ARGSUSED*/
2227 void
2228 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2229 { ASSERT(0); }
2230 
2231 /*ARGSUSED*/
2232 void
2233 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2234 { ASSERT(0); }
2235 
2236 /*ARGSUSED*/
2237 void
2238 cpu_busy_ecache_scrub(struct cpu *cp)
2239 {}
2240 
2241 /*ARGSUSED*/
2242 void
2243 cpu_idle_ecache_scrub(struct cpu *cp)
2244 {}
2245 
2246 /* ARGSUSED */
2247 void
2248 cpu_change_speed(uint64_t divisor, uint64_t arg2)
2249 { ASSERT(0); }
2250 
2251 void
2252 cpu_init_cache_scrub(void)
2253 {}
2254 
2255 /* ARGSUSED */
2256 int
2257 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2258 {
2259 	if (&plat_get_mem_sid) {
2260 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
2261 	} else {
2262 		return (ENOTSUP);
2263 	}
2264 }
2265 
2266 /* ARGSUSED */
2267 int
2268 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2269 {
2270 	if (&plat_get_mem_addr) {
2271 		return (plat_get_mem_addr(unum, sid, offset, addrp));
2272 	} else {
2273 		return (ENOTSUP);
2274 	}
2275 }
2276 
2277 /* ARGSUSED */
2278 int
2279 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2280 {
2281 	if (&plat_get_mem_offset) {
2282 		return (plat_get_mem_offset(flt_addr, offp));
2283 	} else {
2284 		return (ENOTSUP);
2285 	}
2286 }
2287 
2288 /*ARGSUSED*/
2289 void
2290 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2291 { ASSERT(0); }
2292 
2293 /*ARGSUSED*/
2294 void
2295 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2296 { ASSERT(0); }
2297 
2298 /*ARGSUSED*/
2299 void
2300 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
2301 { ASSERT(0); }
2302 
2303 /*ARGSUSED*/
2304 int
2305 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
2306     errorq_elem_t *eqep, size_t afltoffset)
2307 {
2308 	ASSERT(0);
2309 	return (0);
2310 }
2311 
2312 /*ARGSUSED*/
2313 char *
2314 flt_to_error_type(struct async_flt *aflt)
2315 {
2316 	ASSERT(0);
2317 	return (NULL);
2318 }
2319