xref: /titanic_52/usr/src/uts/sun4u/cpu/opl_olympus.c (revision 13a6f743b0f1212958d784a348b0129b4bfe9046)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/opl_olympus_regs.h>
45 #include <sys/opl_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/cpu_module.h>
51 #include <sys/sysmacros.h>
52 #include <sys/intreg.h>
53 #include <sys/clock.h>
54 #include <sys/platform_module.h>
55 #include <sys/ontrap.h>
56 #include <sys/panic.h>
57 #include <sys/memlist.h>
58 #include <sys/ndifm.h>
59 #include <sys/ddifm.h>
60 #include <sys/fm/protocol.h>
61 #include <sys/fm/util.h>
62 #include <sys/fm/cpu/SPARC64-VI.h>
63 #include <sys/dtrace.h>
64 #include <sys/watchpoint.h>
65 #include <sys/promif.h>
66 
67 /*
68  * Internal functions.
69  */
70 static int cpu_sync_log_err(void *flt);
71 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
72 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
73 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
74 
75 /*
76  * Error counters resetting interval.
77  */
78 static int opl_async_check_interval = 60;		/* 1 min */
79 
80 /*
81  * Maximum number of contexts for Olympus-C.
82  */
83 #define	MAX_NCTXS	(1 << 13)
84 
85 /* Will be set !NULL for SPARC64-VI and derivatives. */
86 static uchar_t ctx_pgsz_arr[MAX_NCTXS];
87 uchar_t *ctx_pgsz_array = ctx_pgsz_arr;
88 
89 /*
90  * PA[22:0] represent Displacement in Jupiter
91  * configuration space.
92  */
93 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
94 
95 /*
96  * set in /etc/system to control logging of user BERR/TO's
97  */
98 int cpu_berr_to_verbose = 0;
99 
100 static int min_ecache_size;
101 static uint_t priv_hcl_1;
102 static uint_t priv_hcl_2;
103 static uint_t priv_hcl_4;
104 static uint_t priv_hcl_8;
105 
106 /*
107  * Olympus error log
108  */
109 static opl_errlog_t	*opl_err_log;
110 
111 /*
112  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
113  * No any other ecc_type_info insertion is allowed in between the following
114  * four UE classess.
115  */
116 ecc_type_to_info_t ecc_type_to_info[] = {
117 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
118 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
119 	FM_EREPORT_CPU_UE_MEM,
120 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
121 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
122 	FM_EREPORT_CPU_UE_CHANNEL,
123 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
124 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
125 	FM_EREPORT_CPU_UE_CPU,
126 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
127 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
128 	FM_EREPORT_CPU_UE_PATH,
129 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
130 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
131 	FM_EREPORT_CPU_BERR,
132 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
133 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
134 	FM_EREPORT_CPU_BTO,
135 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
136 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
137 	FM_EREPORT_CPU_MTLB,
138 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
139 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
140 	FM_EREPORT_CPU_TLBP,
141 
142 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
143 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
144 	FM_EREPORT_CPU_CRE,
145 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
146 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
147 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
148 	FM_EREPORT_CPU_TSBCTX,
149 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
150 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
151 	FM_EREPORT_CPU_TSBP,
152 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
153 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
154 	FM_EREPORT_CPU_PSTATE,
155 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
156 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
157 	FM_EREPORT_CPU_TSTATE,
158 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
159 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
160 	FM_EREPORT_CPU_IUG_F,
161 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
162 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
163 	FM_EREPORT_CPU_IUG_R,
164 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
165 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
166 	FM_EREPORT_CPU_SDC,
167 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
168 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
169 	FM_EREPORT_CPU_WDT,
170 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
171 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
172 	FM_EREPORT_CPU_DTLB,
173 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
174 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
175 	FM_EREPORT_CPU_ITLB,
176 	UGESR_IUG_COREERR, "IUG_COREERR",
177 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
178 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
179 	FM_EREPORT_CPU_CORE,
180 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
181 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
182 	FM_EREPORT_CPU_DAE,
183 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
184 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
185 	FM_EREPORT_CPU_IAE,
186 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
187 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
188 	FM_EREPORT_CPU_UGE,
189 	0,		NULL,		0,		0,
190 	NULL,  0,	   0,
191 };
192 
193 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
194 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
195 		int *segsp, int *banksp, int *mcidp);
196 
197 
198 /*
199  * Setup trap handlers for 0xA, 0x32, 0x40 trap types.
200  */
201 void
202 cpu_init_trap(void)
203 {
204 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
205 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
206 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
207 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
208 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
209 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
210 }
211 
212 static int
213 getintprop(pnode_t node, char *name, int deflt)
214 {
215 	int	value;
216 
217 	switch (prom_getproplen(node, name)) {
218 	case sizeof (int):
219 		(void) prom_getprop(node, name, (caddr_t)&value);
220 		break;
221 
222 	default:
223 		value = deflt;
224 		break;
225 	}
226 
227 	return (value);
228 }
229 
230 /*
231  * Set the magic constants of the implementation.
232  */
233 /*ARGSUSED*/
234 void
235 cpu_fiximp(pnode_t dnode)
236 {
237 	int i, a;
238 	extern int vac_size, vac_shift;
239 	extern uint_t vac_mask;
240 
241 	static struct {
242 		char	*name;
243 		int	*var;
244 		int	defval;
245 	} prop[] = {
246 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
247 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
248 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
249 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
250 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
251 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
252 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
253 	};
254 
255 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
256 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
257 
258 	ecache_setsize = ecache_size / ecache_associativity;
259 
260 	vac_size = OPL_VAC_SIZE;
261 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
262 	i = 0; a = vac_size;
263 	while (a >>= 1)
264 		++i;
265 	vac_shift = i;
266 	shm_alignment = vac_size;
267 	vac = 1;
268 }
269 
270 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
271 /*
272  * Quick and dirty way to redefine locally in
273  * OPL the value of IDSR_BN_SETS to 31 instead
274  * of the standard 32 value. This is to workaround
275  * REV_B of Olympus_c processor's problem in handling
276  * more than 31 xcall broadcast.
277  */
278 #undef	IDSR_BN_SETS
279 #define	IDSR_BN_SETS    31
280 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
281 
282 void
283 send_mondo_set(cpuset_t set)
284 {
285 	int lo, busy, nack, shipped = 0;
286 	uint16_t i, cpuids[IDSR_BN_SETS];
287 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
288 	uint64_t starttick, endtick, tick, lasttick;
289 #if (NCPU > IDSR_BN_SETS)
290 	int index = 0;
291 	int ncpuids = 0;
292 #endif
293 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
294 	int bn_sets = IDSR_BN_SETS;
295 	uint64_t ver;
296 
297 	ASSERT(NCPU > bn_sets);
298 #endif
299 
300 	ASSERT(!CPUSET_ISNULL(set));
301 	starttick = lasttick = gettick();
302 
303 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
304 	ver = ultra_getver();
305 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
306 		((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
307 		bn_sets = 1;
308 #endif
309 
310 #if (NCPU <= IDSR_BN_SETS)
311 	for (i = 0; i < NCPU; i++)
312 		if (CPU_IN_SET(set, i)) {
313 			shipit(i, shipped);
314 			nackmask |= IDSR_NACK_BIT(shipped);
315 			cpuids[shipped++] = i;
316 			CPUSET_DEL(set, i);
317 			if (CPUSET_ISNULL(set))
318 				break;
319 		}
320 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
321 #else
322 	for (i = 0; i < NCPU; i++)
323 		if (CPU_IN_SET(set, i)) {
324 			ncpuids++;
325 
326 			/*
327 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
328 			 * find we have shipped to more than (IDSR_BN_SETS)
329 			 * CPUs, set "index" to the highest numbered CPU in
330 			 * the set so we can ship to other CPUs a bit later on.
331 			 */
332 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
333 			if (shipped < bn_sets) {
334 #else
335 			if (shipped < IDSR_BN_SETS) {
336 #endif
337 				shipit(i, shipped);
338 				nackmask |= IDSR_NACK_BIT(shipped);
339 				cpuids[shipped++] = i;
340 				CPUSET_DEL(set, i);
341 				if (CPUSET_ISNULL(set))
342 					break;
343 			} else
344 				index = (int)i;
345 		}
346 
347 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
348 #endif
349 
350 	busymask = IDSR_NACK_TO_BUSY(nackmask);
351 	busy = nack = 0;
352 	endtick = starttick + xc_tick_limit;
353 	for (;;) {
354 		idsr = getidsr();
355 #if (NCPU <= IDSR_BN_SETS)
356 		if (idsr == 0)
357 			break;
358 #else
359 		if (idsr == 0 && shipped == ncpuids)
360 			break;
361 #endif
362 		tick = gettick();
363 		/*
364 		 * If there is a big jump between the current tick
365 		 * count and lasttick, we have probably hit a break
366 		 * point.  Adjust endtick accordingly to avoid panic.
367 		 */
368 		if (tick > (lasttick + xc_tick_jump_limit))
369 			endtick += (tick - lasttick);
370 		lasttick = tick;
371 		if (tick > endtick) {
372 			if (panic_quiesce)
373 				return;
374 			cmn_err(CE_CONT, "send mondo timeout "
375 				"[%d NACK %d BUSY]\nIDSR 0x%"
376 				"" PRIx64 "  cpuids:", nack, busy, idsr);
377 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
378 			for (i = 0; i < bn_sets; i++) {
379 #else
380 			for (i = 0; i < IDSR_BN_SETS; i++) {
381 #endif
382 				if (idsr & (IDSR_NACK_BIT(i) |
383 				    IDSR_BUSY_BIT(i))) {
384 					cmn_err(CE_CONT, " 0x%x",
385 						cpuids[i]);
386 				}
387 			}
388 			cmn_err(CE_CONT, "\n");
389 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
390 		}
391 		curnack = idsr & nackmask;
392 		curbusy = idsr & busymask;
393 
394 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
395 		/*
396 		 * Only proceed to send more xcalls if all the
397 		 * cpus in the previous IDSR_BN_SETS were completed.
398 		 */
399 		if (curbusy) {
400 			busy++;
401 			continue;
402 		}
403 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
404 
405 #if (NCPU > IDSR_BN_SETS)
406 		if (shipped < ncpuids) {
407 			uint64_t cpus_left;
408 			uint16_t next = (uint16_t)index;
409 
410 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
411 			    busymask;
412 
413 			if (cpus_left) {
414 				do {
415 					/*
416 					 * Sequence through and ship to the
417 					 * remainder of the CPUs in the system
418 					 * (e.g. other than the first
419 					 * (IDSR_BN_SETS)) in reverse order.
420 					 */
421 					lo = lowbit(cpus_left) - 1;
422 					i = IDSR_BUSY_IDX(lo);
423 					shipit(next, i);
424 					shipped++;
425 					cpuids[i] = next;
426 
427 					/*
428 					 * If we've processed all the CPUs,
429 					 * exit the loop now and save
430 					 * instructions.
431 					 */
432 					if (shipped == ncpuids)
433 						break;
434 
435 					for ((index = ((int)next - 1));
436 						index >= 0; index--)
437 						if (CPU_IN_SET(set, index)) {
438 							next = (uint16_t)index;
439 							break;
440 						}
441 
442 					cpus_left &= ~(1ull << lo);
443 				} while (cpus_left);
444 				continue;
445 			}
446 		}
447 #endif
448 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
449 		if (curbusy) {
450 			busy++;
451 			continue;
452 		}
453 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
454 #ifdef SEND_MONDO_STATS
455 		{
456 			int n = gettick() - starttick;
457 			if (n < 8192)
458 				x_nack_stimes[n >> 7]++;
459 		}
460 #endif
461 		while (gettick() < (tick + sys_clock_mhz))
462 			;
463 		do {
464 			lo = lowbit(curnack) - 1;
465 			i = IDSR_NACK_IDX(lo);
466 			shipit(cpuids[i], i);
467 			curnack &= ~(1ull << lo);
468 		} while (curnack);
469 		nack++;
470 		busy = 0;
471 	}
472 #ifdef SEND_MONDO_STATS
473 	{
474 		int n = gettick() - starttick;
475 		if (n < 8192)
476 			x_set_stimes[n >> 7]++;
477 		else
478 			x_set_ltimes[(n >> 13) & 0xf]++;
479 	}
480 	x_set_cpus[shipped]++;
481 #endif
482 }
483 
484 /*
485  * Cpu private initialization.
486  */
487 void
488 cpu_init_private(struct cpu *cp)
489 {
490 	if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) {
491 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported",
492 			cp->cpu_id, cpunodes[cp->cpu_id].implementation);
493 	}
494 
495 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
496 }
497 
498 void
499 cpu_setup(void)
500 {
501 	extern int at_flags;
502 	extern int disable_delay_tlb_flush, delay_tlb_flush;
503 	extern int cpc_has_overflow_intr;
504 	extern int disable_text_largepages;
505 	extern int use_text_pgsz4m;
506 	uint64_t cpu0_log;
507 	extern	 uint64_t opl_cpu0_err_log;
508 
509 	/*
510 	 * Initialize Error log Scratch register for error handling.
511 	 */
512 
513 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
514 	opl_error_setup(cpu0_log);
515 
516 	/*
517 	 * Enable MMU translating multiple page sizes for
518 	 * sITLB and sDTLB.
519 	 */
520 	opl_mpg_enable();
521 
522 	/*
523 	 * Setup chip-specific trap handlers.
524 	 */
525 	cpu_init_trap();
526 
527 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
528 
529 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
530 
531 	/*
532 	 * Use the maximum number of contexts available for SPARC64-VI
533 	 * unless it has been tuned for debugging.
534 	 * We are checking against 0 here since this value can be patched
535 	 * while booting.  It can not be patched via /etc/system since it
536 	 * will be patched too late and thus cause the system to panic.
537 	 */
538 	if (nctxs == 0)
539 		nctxs = MAX_NCTXS;
540 
541 	/*
542 	 * Due to the number of entries in the fully-associative tlb
543 	 * this may have to be tuned lower than in spitfire.
544 	 */
545 	pp_slots = MIN(8, MAXPP_SLOTS);
546 
547 	/*
548 	 * Block stores do not invalidate all pages of the d$, pagecopy
549 	 * et. al. need virtual translations with virtual coloring taken
550 	 * into consideration.  prefetch/ldd will pollute the d$ on the
551 	 * load side.
552 	 */
553 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
554 
555 	if (use_page_coloring) {
556 		do_pg_coloring = 1;
557 		if (use_virtual_coloring)
558 			do_virtual_coloring = 1;
559 	}
560 
561 	isa_list =
562 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
563 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
564 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
565 
566 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
567 
568 	/*
569 	 * On SPARC64-VI, there's no hole in the virtual address space
570 	 */
571 	hole_start = hole_end = 0;
572 
573 	/*
574 	 * The kpm mapping window.
575 	 * kpm_size:
576 	 *	The size of a single kpm range.
577 	 *	The overall size will be: kpm_size * vac_colors.
578 	 * kpm_vbase:
579 	 *	The virtual start address of the kpm range within the kernel
580 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
581 	 */
582 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
583 	kpm_size_shift = 47;
584 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
585 	kpm_smallpages = 1;
586 
587 	/*
588 	 * The traptrace code uses either %tick or %stick for
589 	 * timestamping.  We have %stick so we can use it.
590 	 */
591 	traptrace_use_stick = 1;
592 
593 	/*
594 	 * SPARC64-VI has a performance counter overflow interrupt
595 	 */
596 	cpc_has_overflow_intr = 1;
597 
598 	/*
599 	 * Use SPARC64-VI flush-all support
600 	 */
601 	if (!disable_delay_tlb_flush)
602 		delay_tlb_flush = 1;
603 
604 	/*
605 	 * Declare that this architecture/cpu combination does not support
606 	 * fpRAS.
607 	 */
608 	fpras_implemented = 0;
609 
610 	/*
611 	 * Enable 4M pages to be used for mapping user text by default.  Don't
612 	 * use large pages for initialized data segments since we may not know
613 	 * at exec() time what should be the preferred large page size for DTLB
614 	 * programming.
615 	 */
616 	use_text_pgsz4m = 1;
617 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
618 	    (1 << TTE32M) | (1 << TTE256M);
619 }
620 
621 /*
622  * Called by setcpudelay
623  */
624 void
625 cpu_init_tick_freq(void)
626 {
627 	/*
628 	 * For SPARC64-VI we want to use the system clock rate as
629 	 * the basis for low level timing, due to support of mixed
630 	 * speed CPUs and power managment.
631 	 */
632 	if (system_clock_freq == 0)
633 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
634 
635 	sys_tick_freq = system_clock_freq;
636 }
637 
638 #ifdef SEND_MONDO_STATS
639 uint32_t x_one_stimes[64];
640 uint32_t x_one_ltimes[16];
641 uint32_t x_set_stimes[64];
642 uint32_t x_set_ltimes[16];
643 uint32_t x_set_cpus[NCPU];
644 uint32_t x_nack_stimes[64];
645 #endif
646 
647 /*
648  * Note: A version of this function is used by the debugger via the KDI,
649  * and must be kept in sync with this version.  Any changes made to this
650  * function to support new chips or to accomodate errata must also be included
651  * in the KDI-specific version.  See us3_kdi.c.
652  */
653 void
654 send_one_mondo(int cpuid)
655 {
656 	int busy, nack;
657 	uint64_t idsr, starttick, endtick, tick, lasttick;
658 	uint64_t busymask;
659 
660 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
661 	starttick = lasttick = gettick();
662 	shipit(cpuid, 0);
663 	endtick = starttick + xc_tick_limit;
664 	busy = nack = 0;
665 	busymask = IDSR_BUSY;
666 	for (;;) {
667 		idsr = getidsr();
668 		if (idsr == 0)
669 			break;
670 
671 		tick = gettick();
672 		/*
673 		 * If there is a big jump between the current tick
674 		 * count and lasttick, we have probably hit a break
675 		 * point.  Adjust endtick accordingly to avoid panic.
676 		 */
677 		if (tick > (lasttick + xc_tick_jump_limit))
678 			endtick += (tick - lasttick);
679 		lasttick = tick;
680 		if (tick > endtick) {
681 			if (panic_quiesce)
682 				return;
683 			cmn_err(CE_PANIC, "send mondo timeout "
684 				"(target 0x%x) [%d NACK %d BUSY]",
685 					cpuid, nack, busy);
686 		}
687 
688 		if (idsr & busymask) {
689 			busy++;
690 			continue;
691 		}
692 		drv_usecwait(1);
693 		shipit(cpuid, 0);
694 		nack++;
695 		busy = 0;
696 	}
697 #ifdef SEND_MONDO_STATS
698 	{
699 		int n = gettick() - starttick;
700 		if (n < 8192)
701 			x_one_stimes[n >> 7]++;
702 		else
703 			x_one_ltimes[(n >> 13) & 0xf]++;
704 	}
705 #endif
706 }
707 
708 /*
709  * init_mmu_page_sizes is set to one after the bootup time initialization
710  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
711  * valid value.
712  *
713  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
714  * versions of disable_ism_large_pages and disable_large_pages, and feed back
715  * into those two hat variables at hat initialization time.
716  *
717  */
718 int init_mmu_page_sizes = 0;
719 static int mmu_disable_ism_large_pages = ((1 << TTE64K) |
720 	(1 << TTE512K) | (1 << TTE256M));
721 static int mmu_disable_large_pages = 0;
722 
723 /*
724  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
725  * Called during very early bootup from check_cpus_set().
726  * Can be called to verify that mmu_page_sizes are set up correctly.
727  *
728  * Set Olympus defaults. We do not use the function parameter.
729  */
730 /*ARGSUSED*/
731 int
732 mmu_init_mmu_page_sizes(int32_t not_used)
733 {
734 	if (!init_mmu_page_sizes) {
735 		mmu_page_sizes = MMU_PAGE_SIZES;
736 		mmu_hashcnt = MAX_HASHCNT;
737 		mmu_ism_pagesize = MMU_PAGESIZE32M;
738 		mmu_exported_pagesize_mask = (1 << TTE8K) |
739 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
740 		    (1 << TTE32M) | (1 << TTE256M);
741 		init_mmu_page_sizes = 1;
742 		return (0);
743 	}
744 	return (1);
745 }
746 
747 /* SPARC64-VI worst case DTLB parameters */
748 #ifndef	LOCKED_DTLB_ENTRIES
749 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
750 #endif
751 #define	TOTAL_DTLB_ENTRIES	32
752 #define	AVAIL_32M_ENTRIES	0
753 #define	AVAIL_256M_ENTRIES	0
754 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
755 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
756 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
757 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
758 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
759 
760 size_t
761 mmu_map_pgsz(size_t pgsize)
762 {
763 	struct proc *p = curproc;
764 	struct as *as = p->p_as;
765 	struct hat *hat = as->a_hat;
766 	uint_t pgsz0, pgsz1;
767 	size_t size0, size1;
768 
769 	ASSERT(mmu_page_sizes == max_mmu_page_sizes);
770 	pgsz0 = hat->sfmmu_pgsz[0];
771 	pgsz1 = hat->sfmmu_pgsz[1];
772 	size0 = hw_page_array[pgsz0].hp_size;
773 	size1 = hw_page_array[pgsz1].hp_size;
774 	/* Allow use of a larger pagesize if neither TLB is reprogrammed. */
775 	if ((pgsz0 == TTE8K) && (pgsz1 == TTE8K)) {
776 		return (pgsize);
777 	/* Allow use of requested pagesize if TLB is reprogrammed to it. */
778 	} else if ((pgsize == size0) || (pgsize == size1)) {
779 		return (pgsize);
780 	/* Use larger reprogrammed TLB size if pgsize is atleast that big. */
781 	} else if (pgsz1 > pgsz0) {
782 		if (pgsize >= size1)
783 			return (size1);
784 	/* Use smaller reprogrammed TLB size if pgsize is atleast that big. */
785 	} else {
786 		if (pgsize >= size0)
787 			return (size0);
788 	}
789 	return (pgsize);
790 }
791 
792 /*
793  * The function returns the mmu-specific values for the
794  * hat's disable_large_pages and disable_ism_large_pages variables.
795  */
796 int
797 mmu_large_pages_disabled(uint_t flag)
798 {
799 	int pages_disable = 0;
800 
801 	if (flag == HAT_LOAD) {
802 		pages_disable =  mmu_disable_large_pages;
803 	} else if (flag == HAT_LOAD_SHARE) {
804 		pages_disable = mmu_disable_ism_large_pages;
805 	}
806 	return (pages_disable);
807 }
808 
809 /*
810  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
811  * It may be called from set_platform_defaults, if some value other than 32M
812  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
813  * then only warn, since it would be bad form to panic due to a user typo.
814  *
815  * The function re-initializes the mmu_disable_ism_large_pages variable.
816  */
817 void
818 mmu_init_large_pages(size_t ism_pagesize)
819 {
820 	switch (ism_pagesize) {
821 	case MMU_PAGESIZE4M:
822 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
823 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
824 		break;
825 	case MMU_PAGESIZE32M:
826 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
827 		    (1 << TTE512K) | (1 << TTE256M));
828 		break;
829 	case MMU_PAGESIZE256M:
830 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
831 		    (1 << TTE512K) | (1 << TTE32M));
832 		break;
833 	default:
834 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
835 		    ism_pagesize);
836 		break;
837 	}
838 }
839 
840 /*ARGSUSED*/
841 uint_t
842 mmu_preferred_pgsz(struct hat *hat, caddr_t addr, size_t len)
843 {
844 	sfmmu_t *sfmmup = (sfmmu_t *)hat;
845 	uint_t pgsz0, pgsz1;
846 	uint_t szc, maxszc = mmu_page_sizes - 1;
847 	size_t pgsz;
848 	extern int disable_large_pages;
849 
850 	pgsz0 = (uint_t)sfmmup->sfmmu_pgsz[0];
851 	pgsz1 = (uint_t)sfmmup->sfmmu_pgsz[1];
852 
853 	/*
854 	 * If either of the TLBs are reprogrammed, choose
855 	 * the largest mapping size as the preferred size,
856 	 * if it fits the size and alignment constraints.
857 	 * Else return the largest mapping size that fits,
858 	 * if neither TLB is reprogrammed.
859 	 */
860 	if (pgsz0 > TTE8K || pgsz1 > TTE8K) {
861 		if (pgsz1 > pgsz0) {	/* First try pgsz1 */
862 			pgsz = hw_page_array[pgsz1].hp_size;
863 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
864 				return (pgsz1);
865 		}
866 		if (pgsz0 > TTE8K) {	/* Then try pgsz0, if !TTE8K */
867 			pgsz = hw_page_array[pgsz0].hp_size;
868 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
869 				return (pgsz0);
870 		}
871 	} else { /* Otherwise pick best fit if neither TLB is reprogrammed. */
872 		for (szc = maxszc; szc > TTE8K; szc--) {
873 			if (disable_large_pages & (1 << szc))
874 				continue;
875 
876 			pgsz = hw_page_array[szc].hp_size;
877 			if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
878 				return (szc);
879 		}
880 	}
881 	return (TTE8K);
882 }
883 
884 /*
885  * Function to reprogram the TLBs when page sizes used
886  * by a process change significantly.
887  */
888 void
889 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt)
890 {
891 	extern int page_szc(size_t);
892 	uint8_t pgsz0, pgsz1;
893 
894 	/*
895 	 * Don't program 2nd dtlb for kernel and ism hat
896 	 */
897 	if (hat->sfmmu_ismhat || hat == ksfmmup)
898 		return;
899 
900 	/*
901 	 * hat->sfmmu_pgsz[] is an array whose elements
902 	 * contain a sorted order of page sizes.  Element
903 	 * 0 is the most commonly used page size, followed
904 	 * by element 1, and so on.
905 	 *
906 	 * ttecnt[] is an array of per-page-size page counts
907 	 * mapped into the process.
908 	 *
909 	 * If the HAT's choice for page sizes is unsuitable,
910 	 * we can override it here.  The new values written
911 	 * to the array will be handed back to us later to
912 	 * do the actual programming of the TLB hardware.
913 	 *
914 	 */
915 	pgsz0 = (uint8_t)MIN(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]);
916 	pgsz1 = (uint8_t)MAX(hat->sfmmu_pgsz[0], hat->sfmmu_pgsz[1]);
917 
918 	/*
919 	 * This implements PAGESIZE programming of the sTLB
920 	 * if large TTE counts don't exceed the thresholds.
921 	 */
922 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
923 		pgsz0 = page_szc(MMU_PAGESIZE);
924 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
925 		pgsz1 = page_szc(MMU_PAGESIZE);
926 	hat->sfmmu_pgsz[0] = pgsz0;
927 	hat->sfmmu_pgsz[1] = pgsz1;
928 	/* otherwise, accept what the HAT chose for us */
929 }
930 
931 /*
932  * The HAT calls this function when an MMU context is allocated so that we
933  * can reprogram the large TLBs appropriately for the new process using
934  * the context.
935  *
936  * The caller must hold the HAT lock.
937  */
938 void
939 mmu_set_ctx_page_sizes(struct hat *hat)
940 {
941 	uint8_t pgsz0, pgsz1;
942 	uint8_t new_cext;
943 
944 	ASSERT(sfmmu_hat_lock_held(hat));
945 	/*
946 	 * Don't program 2nd dtlb for kernel and ism hat
947 	 */
948 	if (hat->sfmmu_ismhat || hat == ksfmmup)
949 		return;
950 
951 	/*
952 	 * If supported, reprogram the TLBs to a larger pagesize.
953 	 */
954 	pgsz0 = hat->sfmmu_pgsz[0];
955 	pgsz1 = hat->sfmmu_pgsz[1];
956 	ASSERT(pgsz0 < mmu_page_sizes);
957 	ASSERT(pgsz1 < mmu_page_sizes);
958 	new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
959 	if (hat->sfmmu_cext != new_cext) {
960 		hat->sfmmu_cext = new_cext;
961 	}
962 	ctx_pgsz_array[hat->sfmmu_cnum] = hat->sfmmu_cext;
963 	/*
964 	 * sfmmu_setctx_sec() will take care of the
965 	 * rest of the dirty work for us.
966 	 */
967 }
968 
969 /*
970  * Return processor specific async error structure
971  * size used.
972  */
973 int
974 cpu_aflt_size(void)
975 {
976 	return (sizeof (opl_async_flt_t));
977 }
978 
979 /*
980  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
981  * post-process CPU events that are dequeued.  As such, it can be invoked
982  * from softint context, from AST processing in the trap() flow, or from the
983  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
984  * Historically this entry point was used to log the actual cmn_err(9F) text;
985  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
986  * With FMA this function now also returns a flag which indicates to the
987  * caller whether the ereport should be posted (1) or suppressed (0).
988  */
989 /*ARGSUSED*/
990 static int
991 cpu_sync_log_err(void *flt)
992 {
993 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
994 	struct async_flt *aflt = (struct async_flt *)flt;
995 
996 	/*
997 	 * No extra processing of urgent error events.
998 	 * Always generate ereports for these events.
999 	 */
1000 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
1001 		return (1);
1002 
1003 	/*
1004 	 * Additional processing for synchronous errors.
1005 	 */
1006 	switch (opl_flt->flt_type) {
1007 	case OPL_CPU_INV_SFSR:
1008 		return (1);
1009 
1010 	case OPL_CPU_SYNC_UE:
1011 		/*
1012 		 * The validity: SFSR_MK_UE bit has been checked
1013 		 * in opl_cpu_sync_error()
1014 		 * No more check is required.
1015 		 *
1016 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
1017 		 * and they have been retrieved in cpu_queue_events()
1018 		 */
1019 
1020 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
1021 			ASSERT(aflt->flt_in_memory);
1022 			/*
1023 			 * We want to skip logging only if ALL the following
1024 			 * conditions are true:
1025 			 *
1026 			 *	1. We are not panicing already.
1027 			 *	2. The error is a memory error.
1028 			 *	3. There is only one error.
1029 			 *	4. The error is on a retired page.
1030 			 *	5. The error occurred under on_trap
1031 			 *	protection AFLT_PROT_EC
1032 			 */
1033 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1034 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1035 				/*
1036 				 * Do not log an error from
1037 				 * the retired page
1038 				 */
1039 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1040 				return (0);
1041 			}
1042 			if (!panicstr)
1043 				cpu_page_retire(opl_flt);
1044 		}
1045 		return (1);
1046 
1047 	case OPL_CPU_SYNC_OTHERS:
1048 		/*
1049 		 * For the following error cases, the processor HW does
1050 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1051 		 * to assign appropriate values here to reflect what we
1052 		 * think is the most likely cause of the problem w.r.t to
1053 		 * the particular error event.  For Buserr and timeout
1054 		 * error event, we will assign OPL_ERRID_CHANNEL as the
1055 		 * most likely reason.  For TLB parity or multiple hit
1056 		 * error events, we will assign the reason as
1057 		 * OPL_ERRID_CPU (cpu related problem) and set the
1058 		 * flt_eid_sid to point to the cpuid.
1059 		 */
1060 
1061 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1062 			/*
1063 			 * flt_eid_sid will not be used for this case.
1064 			 */
1065 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1066 		}
1067 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1068 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1069 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1070 		}
1071 
1072 		/*
1073 		 * In case of no effective error bit
1074 		 */
1075 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1076 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1077 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1078 		}
1079 		break;
1080 
1081 		default:
1082 			return (1);
1083 	}
1084 	return (1);
1085 }
1086 
1087 /*
1088  * Retire the bad page that may contain the flushed error.
1089  */
1090 void
1091 cpu_page_retire(opl_async_flt_t *opl_flt)
1092 {
1093 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1094 	(void) page_retire(aflt->flt_addr, PR_UE);
1095 }
1096 
1097 /*
1098  * Invoked by error_init() early in startup and therefore before
1099  * startup_errorq() is called to drain any error Q -
1100  *
1101  * startup()
1102  *   startup_end()
1103  *     error_init()
1104  *       cpu_error_init()
1105  * errorq_init()
1106  *   errorq_drain()
1107  * start_other_cpus()
1108  *
1109  * The purpose of this routine is to create error-related taskqs.  Taskqs
1110  * are used for this purpose because cpu_lock can't be grabbed from interrupt
1111  * context.
1112  *
1113  */
1114 /*ARGSUSED*/
1115 void
1116 cpu_error_init(int items)
1117 {
1118 	opl_err_log = (opl_errlog_t *)
1119 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1120 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1121 		cmn_err(CE_PANIC, "The base address of the error log "
1122 		    "is not page aligned");
1123 }
1124 
1125 /*
1126  * We route all errors through a single switch statement.
1127  */
1128 void
1129 cpu_ue_log_err(struct async_flt *aflt)
1130 {
1131 	switch (aflt->flt_class) {
1132 	case CPU_FAULT:
1133 		if (cpu_sync_log_err(aflt))
1134 			cpu_ereport_post(aflt);
1135 		break;
1136 
1137 	case BUS_FAULT:
1138 		bus_async_log_err(aflt);
1139 		break;
1140 
1141 	default:
1142 		cmn_err(CE_WARN, "discarding async error %p with invalid "
1143 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1144 		return;
1145 	}
1146 }
1147 
1148 /*
1149  * Routine for panic hook callback from panic_idle().
1150  *
1151  * Nothing to do here.
1152  */
1153 void
1154 cpu_async_panic_callb(void)
1155 {
1156 }
1157 
1158 /*
1159  * Routine to return a string identifying the physical name
1160  * associated with a memory/cache error.
1161  */
1162 /*ARGSUSED*/
1163 int
1164 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1165     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1166     ushort_t flt_status, char *buf, int buflen, int *lenp)
1167 {
1168 	int synd_code;
1169 	int ret;
1170 
1171 	/*
1172 	 * An AFSR of -1 defaults to a memory syndrome.
1173 	 */
1174 	synd_code = (int)flt_synd;
1175 
1176 	if (&plat_get_mem_unum) {
1177 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1178 			flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1179 			buf[0] = '\0';
1180 			*lenp = 0;
1181 		}
1182 		return (ret);
1183 	}
1184 	buf[0] = '\0';
1185 	*lenp = 0;
1186 	return (ENOTSUP);
1187 }
1188 
1189 /*
1190  * Wrapper for cpu_get_mem_unum() routine that takes an
1191  * async_flt struct rather than explicit arguments.
1192  */
1193 int
1194 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1195     char *buf, int buflen, int *lenp)
1196 {
1197 	/*
1198 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1199 	 * memory error.
1200 	 */
1201 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1202 	    (uint64_t)-1,
1203 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1204 	    aflt->flt_status, buf, buflen, lenp));
1205 }
1206 
1207 /*
1208  * This routine is a more generic interface to cpu_get_mem_unum()
1209  * that may be used by other modules (e.g. mm).
1210  */
1211 /*ARGSUSED*/
1212 int
1213 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1214     char *buf, int buflen, int *lenp)
1215 {
1216 	int synd_status, flt_in_memory, ret;
1217 	ushort_t flt_status = 0;
1218 	char unum[UNUM_NAMLEN];
1219 
1220 	/*
1221 	 * Check for an invalid address.
1222 	 */
1223 	if (afar == (uint64_t)-1)
1224 		return (ENXIO);
1225 
1226 	if (synd == (uint64_t)-1)
1227 		synd_status = AFLT_STAT_INVALID;
1228 	else
1229 		synd_status = AFLT_STAT_VALID;
1230 
1231 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1232 		pf_is_memory(afar >> MMU_PAGESHIFT);
1233 
1234 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1235 		CPU->cpu_id, flt_in_memory, flt_status, unum,
1236 		UNUM_NAMLEN, lenp);
1237 	if (ret != 0)
1238 		return (ret);
1239 
1240 	if (*lenp >= buflen)
1241 		return (ENAMETOOLONG);
1242 
1243 	(void) strncpy(buf, unum, buflen);
1244 
1245 	return (0);
1246 }
1247 
1248 /*
1249  * Routine to return memory information associated
1250  * with a physical address and syndrome.
1251  */
1252 /*ARGSUSED*/
1253 int
1254 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1255     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1256     int *segsp, int *banksp, int *mcidp)
1257 {
1258 	int synd_code = (int)synd;
1259 
1260 	if (afar == (uint64_t)-1)
1261 		return (ENXIO);
1262 
1263 	if (p2get_mem_info != NULL)
1264 		return ((p2get_mem_info)(synd_code, afar,
1265 			mem_sizep, seg_sizep, bank_sizep,
1266 			segsp, banksp, mcidp));
1267 	else
1268 		return (ENOTSUP);
1269 }
1270 
1271 /*
1272  * Routine to return a string identifying the physical
1273  * name associated with a cpuid.
1274  */
1275 int
1276 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1277 {
1278 	int ret;
1279 	char unum[UNUM_NAMLEN];
1280 
1281 	if (&plat_get_cpu_unum) {
1282 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
1283 			!= 0)
1284 			return (ret);
1285 	} else {
1286 		return (ENOTSUP);
1287 	}
1288 
1289 	if (*lenp >= buflen)
1290 		return (ENAMETOOLONG);
1291 
1292 	(void) strncpy(buf, unum, *lenp);
1293 
1294 	return (0);
1295 }
1296 
1297 /*
1298  * This routine exports the name buffer size.
1299  */
1300 size_t
1301 cpu_get_name_bufsize()
1302 {
1303 	return (UNUM_NAMLEN);
1304 }
1305 
1306 /*
1307  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1308  */
1309 void
1310 cpu_flush_ecache(void)
1311 {
1312 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1313 	    cpunodes[CPU->cpu_id].ecache_linesize);
1314 }
1315 
1316 static uint8_t
1317 flt_to_trap_type(struct async_flt *aflt)
1318 {
1319 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1320 		return (TRAP_TYPE_ECC_I);
1321 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1322 		return (TRAP_TYPE_ECC_D);
1323 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1324 		return (TRAP_TYPE_URGENT);
1325 	return (-1);
1326 }
1327 
1328 /*
1329  * Encode the data saved in the opl_async_flt_t struct into
1330  * the FM ereport payload.
1331  */
1332 /* ARGSUSED */
1333 static void
1334 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1335 		nvlist_t *resource)
1336 {
1337 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1338 	char unum[UNUM_NAMLEN];
1339 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1340 	int len;
1341 
1342 
1343 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1344 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1345 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1346 	}
1347 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1348 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1349 			DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1350 	}
1351 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1352 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1353 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1354 	}
1355 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1356 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1357 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1358 	}
1359 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1360 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1361 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1362 	}
1363 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1364 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1365 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1366 	}
1367 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1368 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1369 		    DATA_TYPE_BOOLEAN_VALUE,
1370 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1371 	}
1372 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1373 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1374 			DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1375 	}
1376 
1377 	switch (opl_flt->flt_eid_mod) {
1378 	case OPL_ERRID_CPU:
1379 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1380 			(u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1381 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1382 			NULL, opl_flt->flt_eid_sid,
1383 			(uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version,
1384 			sbuf);
1385 		fm_payload_set(payload,
1386 			FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1387 			DATA_TYPE_NVLIST, resource, NULL);
1388 		break;
1389 
1390 	case OPL_ERRID_CHANNEL:
1391 		/*
1392 		 * No resource is created but the cpumem DE will find
1393 		 * the defective path by retreiving EID from SFSR which is
1394 		 * included in the payload.
1395 		 */
1396 		break;
1397 
1398 	case OPL_ERRID_MEM:
1399 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1400 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
1401 			NULL, unum, NULL, (uint64_t)-1);
1402 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1403 			DATA_TYPE_NVLIST, resource, NULL);
1404 		break;
1405 
1406 	case OPL_ERRID_PATH:
1407 		/*
1408 		 * No resource is created but the cpumem DE will find
1409 		 * the defective path by retreiving EID from SFSR which is
1410 		 * included in the payload.
1411 		 */
1412 		break;
1413 	}
1414 }
1415 
1416 /*
1417  * Returns whether fault address is valid for this error bit and
1418  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1419  */
1420 /*ARGSUSED*/
1421 static int
1422 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1423 {
1424 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1425 
1426 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1427 		return ((t_afsr_bit & SFSR_MEMORY) &&
1428 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1429 	}
1430 	return (0);
1431 }
1432 
1433 /*
1434  * In OPL SCF does the stick synchronization.
1435  */
1436 void
1437 sticksync_slave(void)
1438 {
1439 }
1440 
1441 /*
1442  * In OPL SCF does the stick synchronization.
1443  */
1444 void
1445 sticksync_master(void)
1446 {
1447 }
1448 
1449 /*
1450  * Cpu private unitialization.  OPL cpus do not use the private area.
1451  */
1452 void
1453 cpu_uninit_private(struct cpu *cp)
1454 {
1455 	cmp_delete_cpu(cp->cpu_id);
1456 }
1457 
1458 /*
1459  * Always flush an entire cache.
1460  */
1461 void
1462 cpu_error_ecache_flush(void)
1463 {
1464 	cpu_flush_ecache();
1465 }
1466 
1467 void
1468 cpu_ereport_post(struct async_flt *aflt)
1469 {
1470 	char *cpu_type, buf[FM_MAX_CLASS];
1471 	nv_alloc_t *nva = NULL;
1472 	nvlist_t *ereport, *detector, *resource;
1473 	errorq_elem_t *eqep;
1474 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1475 
1476 	if (aflt->flt_panic || panicstr) {
1477 		eqep = errorq_reserve(ereport_errorq);
1478 		if (eqep == NULL)
1479 			return;
1480 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1481 		nva = errorq_elem_nva(ereport_errorq, eqep);
1482 	} else {
1483 		ereport = fm_nvlist_create(nva);
1484 	}
1485 
1486 	/*
1487 	 * Create the scheme "cpu" FMRI.
1488 	 */
1489 	detector = fm_nvlist_create(nva);
1490 	resource = fm_nvlist_create(nva);
1491 	switch (cpunodes[aflt->flt_inst].implementation) {
1492 	case OLYMPUS_C_IMPL:
1493 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1494 		break;
1495 	default:
1496 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1497 		break;
1498 	}
1499 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1500 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1501 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1502 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1503 	    sbuf);
1504 
1505 	/*
1506 	 * Encode all the common data into the ereport.
1507 	 */
1508 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1509 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1510 
1511 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1512 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1513 
1514 	/*
1515 	 * Encode the error specific data that was saved in
1516 	 * the async_flt structure into the ereport.
1517 	 */
1518 	cpu_payload_add_aflt(aflt, ereport, resource);
1519 
1520 	if (aflt->flt_panic || panicstr) {
1521 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1522 	} else {
1523 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1524 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1525 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1526 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1527 	}
1528 }
1529 
1530 void
1531 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1532 {
1533 	int status;
1534 	ddi_fm_error_t de;
1535 
1536 	bzero(&de, sizeof (ddi_fm_error_t));
1537 
1538 	de.fme_version = DDI_FME_VERSION;
1539 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1540 	de.fme_flag = expected;
1541 	de.fme_bus_specific = (void *)aflt->flt_addr;
1542 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1543 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1544 		aflt->flt_panic = 1;
1545 }
1546 
1547 void
1548 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1549     errorq_t *eqp, uint_t flag)
1550 {
1551 	struct async_flt *aflt = (struct async_flt *)payload;
1552 
1553 	aflt->flt_erpt_class = error_class;
1554 	errorq_dispatch(eqp, payload, payload_sz, flag);
1555 }
1556 
1557 void
1558 adjust_hw_copy_limits(int ecache_size)
1559 {
1560 	/*
1561 	 * Set hw copy limits.
1562 	 *
1563 	 * /etc/system will be parsed later and can override one or more
1564 	 * of these settings.
1565 	 *
1566 	 * At this time, ecache size seems only mildly relevant.
1567 	 * We seem to run into issues with the d-cache and stalls
1568 	 * we see on misses.
1569 	 *
1570 	 * Cycle measurement indicates that 2 byte aligned copies fare
1571 	 * little better than doing things with VIS at around 512 bytes.
1572 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1573 	 * aligned is faster whenever the source and destination data
1574 	 * in cache and the total size is less than 2 Kbytes.  The 2K
1575 	 * limit seems to be driven by the 2K write cache.
1576 	 * When more than 2K of copies are done in non-VIS mode, stores
1577 	 * backup in the write cache.  In VIS mode, the write cache is
1578 	 * bypassed, allowing faster cache-line writes aligned on cache
1579 	 * boundaries.
1580 	 *
1581 	 * In addition, in non-VIS mode, there is no prefetching, so
1582 	 * for larger copies, the advantage of prefetching to avoid even
1583 	 * occasional cache misses is enough to justify using the VIS code.
1584 	 *
1585 	 * During testing, it was discovered that netbench ran 3% slower
1586 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1587 	 * applications, data is only used once (copied to the output
1588 	 * buffer, then copied by the network device off the system).  Using
1589 	 * the VIS copy saves more L2 cache state.  Network copies are
1590 	 * around 1.3K to 1.5K in size for historical reasons.
1591 	 *
1592 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1593 	 * aligned copy even for large caches and 8 MB ecache.  The
1594 	 * infrastructure to allow different limits for different sized
1595 	 * caches is kept to allow further tuning in later releases.
1596 	 */
1597 
1598 	if (min_ecache_size == 0 && use_hw_bcopy) {
1599 		/*
1600 		 * First time through - should be before /etc/system
1601 		 * is read.
1602 		 * Could skip the checks for zero but this lets us
1603 		 * preserve any debugger rewrites.
1604 		 */
1605 		if (hw_copy_limit_1 == 0) {
1606 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1607 			priv_hcl_1 = hw_copy_limit_1;
1608 		}
1609 		if (hw_copy_limit_2 == 0) {
1610 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1611 			priv_hcl_2 = hw_copy_limit_2;
1612 		}
1613 		if (hw_copy_limit_4 == 0) {
1614 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1615 			priv_hcl_4 = hw_copy_limit_4;
1616 		}
1617 		if (hw_copy_limit_8 == 0) {
1618 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1619 			priv_hcl_8 = hw_copy_limit_8;
1620 		}
1621 		min_ecache_size = ecache_size;
1622 	} else {
1623 		/*
1624 		 * MP initialization. Called *after* /etc/system has
1625 		 * been parsed. One CPU has already been initialized.
1626 		 * Need to cater for /etc/system having scragged one
1627 		 * of our values.
1628 		 */
1629 		if (ecache_size == min_ecache_size) {
1630 			/*
1631 			 * Same size ecache. We do nothing unless we
1632 			 * have a pessimistic ecache setting. In that
1633 			 * case we become more optimistic (if the cache is
1634 			 * large enough).
1635 			 */
1636 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1637 				/*
1638 				 * Need to adjust hw_copy_limit* from our
1639 				 * pessimistic uniprocessor value to a more
1640 				 * optimistic UP value *iff* it hasn't been
1641 				 * reset.
1642 				 */
1643 				if ((ecache_size > 1048576) &&
1644 				    (priv_hcl_8 == hw_copy_limit_8)) {
1645 					if (ecache_size <= 2097152)
1646 						hw_copy_limit_8 = 4 *
1647 						    VIS_COPY_THRESHOLD;
1648 					else if (ecache_size <= 4194304)
1649 						hw_copy_limit_8 = 4 *
1650 						    VIS_COPY_THRESHOLD;
1651 					else
1652 						hw_copy_limit_8 = 4 *
1653 						    VIS_COPY_THRESHOLD;
1654 					priv_hcl_8 = hw_copy_limit_8;
1655 				}
1656 			}
1657 		} else if (ecache_size < min_ecache_size) {
1658 			/*
1659 			 * A different ecache size. Can this even happen?
1660 			 */
1661 			if (priv_hcl_8 == hw_copy_limit_8) {
1662 				/*
1663 				 * The previous value that we set
1664 				 * is unchanged (i.e., it hasn't been
1665 				 * scragged by /etc/system). Rewrite it.
1666 				 */
1667 				if (ecache_size <= 1048576)
1668 					hw_copy_limit_8 = 8 *
1669 					    VIS_COPY_THRESHOLD;
1670 				else if (ecache_size <= 2097152)
1671 					hw_copy_limit_8 = 8 *
1672 					    VIS_COPY_THRESHOLD;
1673 				else if (ecache_size <= 4194304)
1674 					hw_copy_limit_8 = 8 *
1675 					    VIS_COPY_THRESHOLD;
1676 				else
1677 					hw_copy_limit_8 = 10 *
1678 					    VIS_COPY_THRESHOLD;
1679 				priv_hcl_8 = hw_copy_limit_8;
1680 				min_ecache_size = ecache_size;
1681 			}
1682 		}
1683 	}
1684 }
1685 
1686 #define	VIS_BLOCKSIZE		64
1687 
1688 int
1689 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1690 {
1691 	int ret, watched;
1692 
1693 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1694 	ret = dtrace_blksuword32(addr, data, 0);
1695 	if (watched)
1696 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1697 
1698 	return (ret);
1699 }
1700 
1701 void
1702 opl_cpu_reg_init()
1703 {
1704 	uint64_t	this_cpu_log;
1705 
1706 	/*
1707 	 * We do not need to re-initialize cpu0 registers.
1708 	 */
1709 	if (cpu[getprocessorid()] == &cpu0)
1710 		return;
1711 
1712 	/*
1713 	 * Initialize Error log Scratch register for error handling.
1714 	 */
1715 
1716 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1717 		ERRLOG_BUFSZ * (getprocessorid())));
1718 	opl_error_setup(this_cpu_log);
1719 
1720 	/*
1721 	 * Enable MMU translating multiple page sizes for
1722 	 * sITLB and sDTLB.
1723 	 */
1724 	opl_mpg_enable();
1725 }
1726 
1727 /*
1728  * Queue one event in ue_queue based on ecc_type_to_info entry.
1729  */
1730 static void
1731 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1732     ecc_type_to_info_t *eccp)
1733 {
1734 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1735 
1736 	if (reason &&
1737 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1738 		(void) strcat(reason, eccp->ec_reason);
1739 	}
1740 
1741 	opl_flt->flt_bit = eccp->ec_afsr_bit;
1742 	opl_flt->flt_type = eccp->ec_flt_type;
1743 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1744 	aflt->flt_payload = eccp->ec_err_payload;
1745 
1746 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1747 	cpu_errorq_dispatch(eccp->ec_err_class,
1748 		(void *)opl_flt, sizeof (opl_async_flt_t),
1749 		ue_queue,
1750 		aflt->flt_panic);
1751 }
1752 
1753 /*
1754  * Queue events on async event queue one event per error bit.
1755  * Return number of events queued.
1756  */
1757 int
1758 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1759 {
1760 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1761 	ecc_type_to_info_t *eccp;
1762 	int nevents = 0;
1763 
1764 	/*
1765 	 * Queue expected errors, error bit and fault type must must match
1766 	 * in the ecc_type_to_info table.
1767 	 */
1768 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1769 		eccp++) {
1770 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1771 		    (eccp->ec_flags & aflt->flt_status) != 0) {
1772 			/*
1773 			 * UE error event can be further
1774 			 * classified/breakdown into finer granularity
1775 			 * based on the flt_eid_mod value set by HW.  We do
1776 			 * special handling here so that we can report UE
1777 			 * error in finer granularity as ue_mem,
1778 			 * ue_channel, ue_cpu or ue_path.
1779 			 */
1780 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1781 				opl_flt->flt_eid_mod =
1782 					(aflt->flt_stat & SFSR_EID_MOD)
1783 					>> SFSR_EID_MOD_SHIFT;
1784 				opl_flt->flt_eid_sid =
1785 					(aflt->flt_stat & SFSR_EID_SID)
1786 					>> SFSR_EID_SID_SHIFT;
1787 				/*
1788 				 * Need to advance eccp pointer by flt_eid_mod
1789 				 * so that we get an appropriate ecc pointer
1790 				 *
1791 				 * EID			# of advances
1792 				 * ----------------------------------
1793 				 * OPL_ERRID_MEM	0
1794 				 * OPL_ERRID_CHANNEL	1
1795 				 * OPL_ERRID_CPU	2
1796 				 * OPL_ERRID_PATH	3
1797 				 */
1798 				eccp += opl_flt->flt_eid_mod;
1799 			}
1800 			cpu_queue_one_event(opl_flt, reason, eccp);
1801 			t_afsr_errs &= ~eccp->ec_afsr_bit;
1802 			nevents++;
1803 		}
1804 	}
1805 
1806 	return (nevents);
1807 }
1808 
1809 /*
1810  * Sync. error wrapper functions.
1811  * We use these functions in order to transfer here from the
1812  * nucleus trap handler information about trap type (data or
1813  * instruction) and trap level (0 or above 0). This way we
1814  * get rid of using SFSR's reserved bits.
1815  */
1816 
1817 #define	OPL_SYNC_TL0	0
1818 #define	OPL_SYNC_TL1	1
1819 #define	OPL_ISYNC_ERR	0
1820 #define	OPL_DSYNC_ERR	1
1821 
1822 void
1823 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1824 {
1825 	uint64_t t_sfar = p_sfar;
1826 	uint64_t t_sfsr = p_sfsr;
1827 
1828 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1829 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1830 }
1831 
1832 void
1833 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1834 {
1835 	uint64_t t_sfar = p_sfar;
1836 	uint64_t t_sfsr = p_sfsr;
1837 
1838 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1839 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1840 }
1841 
1842 void
1843 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1844 {
1845 	uint64_t t_sfar = p_sfar;
1846 	uint64_t t_sfsr = p_sfsr;
1847 
1848 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1849 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1850 }
1851 
1852 void
1853 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1854 {
1855 	uint64_t t_sfar = p_sfar;
1856 	uint64_t t_sfsr = p_sfsr;
1857 
1858 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1859 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1860 }
1861 
1862 /*
1863  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1864  * and TLB_PRT.
1865  * This function is designed based on cpu_deferred_error().
1866  */
1867 
1868 static void
1869 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1870     uint_t tl, uint_t derr)
1871 {
1872 	opl_async_flt_t opl_flt;
1873 	struct async_flt *aflt;
1874 	int trampolined = 0;
1875 	char pr_reason[MAX_REASON_STRING];
1876 	uint64_t log_sfsr;
1877 	int expected = DDI_FM_ERR_UNEXPECTED;
1878 	ddi_acc_hdl_t *hp;
1879 
1880 	/*
1881 	 * We need to look at p_flag to determine if the thread detected an
1882 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1883 	 * because we just need a consistent snapshot and we know that everyone
1884 	 * else will store a consistent set of bits while holding p_lock.  We
1885 	 * don't have to worry about a race because SDOCORE is set once prior
1886 	 * to doing i/o from the process's address space and is never cleared.
1887 	 */
1888 	uint_t pflag = ttoproc(curthread)->p_flag;
1889 
1890 	pr_reason[0] = '\0';
1891 
1892 	/*
1893 	 * handle the specific error
1894 	 */
1895 	bzero(&opl_flt, sizeof (opl_async_flt_t));
1896 	aflt = (struct async_flt *)&opl_flt;
1897 	aflt->flt_id = gethrtime_waitfree();
1898 	aflt->flt_bus_id = getprocessorid();
1899 	aflt->flt_inst = CPU->cpu_id;
1900 	aflt->flt_stat = t_sfsr;
1901 	aflt->flt_addr = t_sfar;
1902 	aflt->flt_pc = (caddr_t)rp->r_pc;
1903 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1904 	aflt->flt_class = (uchar_t)CPU_FAULT;
1905 	aflt->flt_priv = (uchar_t)
1906 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
1907 	aflt->flt_tl = (uchar_t)tl;
1908 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1909 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1910 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1911 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1912 
1913 	/*
1914 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1915 	 * So, clear all error bits to avoid mis-handling and force the system
1916 	 * panicked.
1917 	 * We skip all the procedures below down to the panic message call.
1918 	 */
1919 	if (!(t_sfsr & SFSR_FV)) {
1920 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1921 		aflt->flt_panic = 1;
1922 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1923 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
1924 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
1925 			aflt->flt_panic);
1926 		fm_panic("%sErrors(s)", "invalid SFSR");
1927 	}
1928 
1929 	/*
1930 	 * If either UE and MK bit is off, this is not valid UE error.
1931 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1932 	 * mis-handling below.
1933 	 * aflt->flt_stat keeps the original bits as a reference.
1934 	 */
1935 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1936 	    (SFSR_MK_UE|SFSR_UE)) {
1937 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1938 	}
1939 
1940 	/*
1941 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1942 	 * see if we were executing in the kernel under on_trap() or t_lofault
1943 	 * protection.  If so, modify the saved registers so that we return
1944 	 * from the trap to the appropriate trampoline routine.
1945 	 */
1946 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1947 		if (curthread->t_ontrap != NULL) {
1948 			on_trap_data_t *otp = curthread->t_ontrap;
1949 
1950 			if (otp->ot_prot & OT_DATA_EC) {
1951 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1952 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1953 				rp->r_pc = otp->ot_trampoline;
1954 				rp->r_npc = rp->r_pc + 4;
1955 				trampolined = 1;
1956 			}
1957 
1958 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1959 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1960 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
1961 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
1962 				rp->r_pc = otp->ot_trampoline;
1963 				rp->r_npc = rp->r_pc + 4;
1964 				trampolined = 1;
1965 				/*
1966 				 * for peeks and caut_gets errors are expected
1967 				 */
1968 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1969 				if (!hp)
1970 					expected = DDI_FM_ERR_PEEK;
1971 				else if (hp->ah_acc.devacc_attr_access ==
1972 				    DDI_CAUTIOUS_ACC)
1973 					expected = DDI_FM_ERR_EXPECTED;
1974 			}
1975 
1976 		} else if (curthread->t_lofault) {
1977 			aflt->flt_prot = AFLT_PROT_COPY;
1978 			rp->r_g1 = EFAULT;
1979 			rp->r_pc = curthread->t_lofault;
1980 			rp->r_npc = rp->r_pc + 4;
1981 			trampolined = 1;
1982 		}
1983 	}
1984 
1985 	/*
1986 	 * If we're in user mode or we're doing a protected copy, we either
1987 	 * want the ASTON code below to send a signal to the user process
1988 	 * or we want to panic if aft_panic is set.
1989 	 *
1990 	 * If we're in privileged mode and we're not doing a copy, then we
1991 	 * need to check if we've trampolined.  If we haven't trampolined,
1992 	 * we should panic.
1993 	 */
1994 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1995 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
1996 			aflt->flt_panic |= aft_panic;
1997 	} else if (!trampolined) {
1998 		aflt->flt_panic = 1;
1999 	}
2000 
2001 	/*
2002 	 * If we've trampolined due to a privileged TO or BERR, or if an
2003 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
2004 	 * event for that TO or BERR.  Queue all other events (if any) besides
2005 	 * the TO/BERR.
2006 	 */
2007 	log_sfsr = t_sfsr;
2008 	if (trampolined) {
2009 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2010 	} else if (!aflt->flt_priv) {
2011 		/*
2012 		 * User mode, suppress messages if
2013 		 * cpu_berr_to_verbose is not set.
2014 		 */
2015 		if (!cpu_berr_to_verbose)
2016 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2017 	}
2018 
2019 	if (((log_sfsr & SFSR_ERRS) &&
2020 		(cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) ||
2021 	    ((t_sfsr & SFSR_ERRS) == 0)) {
2022 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
2023 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
2024 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
2025 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
2026 			aflt->flt_panic);
2027 	}
2028 
2029 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2030 		cpu_run_bus_error_handlers(aflt, expected);
2031 	}
2032 
2033 	/*
2034 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2035 	 * be logged as part of the panic flow.
2036 	 */
2037 	if (aflt->flt_panic) {
2038 		if (pr_reason[0] == 0)
2039 			strcpy(pr_reason, "invalid SFSR ");
2040 
2041 		fm_panic("%sErrors(s)", pr_reason);
2042 	}
2043 
2044 	/*
2045 	 * If we queued an error and we are going to return from the trap and
2046 	 * the error was in user mode or inside of a copy routine, set AST flag
2047 	 * so the queue will be drained before returning to user mode.  The
2048 	 * AST processing will also act on our failure policy.
2049 	 */
2050 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2051 		int pcb_flag = 0;
2052 
2053 		if (t_sfsr & (SFSR_ERRS &
2054 			~(SFSR_BERR | SFSR_TO)))
2055 			pcb_flag |= ASYNC_HWERR;
2056 
2057 		if (t_sfsr & SFSR_BERR)
2058 			pcb_flag |= ASYNC_BERR;
2059 
2060 		if (t_sfsr & SFSR_TO)
2061 			pcb_flag |= ASYNC_BTO;
2062 
2063 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2064 		aston(curthread);
2065 	}
2066 }
2067 
2068 /*ARGSUSED*/
2069 void
2070 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2071 {
2072 	opl_async_flt_t opl_flt;
2073 	struct async_flt *aflt;
2074 	char pr_reason[MAX_REASON_STRING];
2075 
2076 	/* normalize tl */
2077 	tl = (tl >= 2 ? 1 : 0);
2078 	pr_reason[0] = '\0';
2079 
2080 	bzero(&opl_flt, sizeof (opl_async_flt_t));
2081 	aflt = (struct async_flt *)&opl_flt;
2082 	aflt->flt_id = gethrtime_waitfree();
2083 	aflt->flt_bus_id = getprocessorid();
2084 	aflt->flt_inst = CPU->cpu_id;
2085 	aflt->flt_stat = p_ugesr;
2086 	aflt->flt_pc = (caddr_t)rp->r_pc;
2087 	aflt->flt_class = (uchar_t)CPU_FAULT;
2088 	aflt->flt_tl = tl;
2089 	aflt->flt_priv = (uchar_t)
2090 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
2091 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2092 	aflt->flt_panic = 1;
2093 	/*
2094 	 * HW does not set mod/sid in case of urgent error.
2095 	 * So we have to set it here.
2096 	 */
2097 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2098 	opl_flt.flt_eid_sid = aflt->flt_inst;
2099 
2100 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2101 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2102 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2103 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG,
2104 			(void *)&opl_flt, sizeof (opl_async_flt_t),
2105 			ue_queue, aflt->flt_panic);
2106 	}
2107 
2108 	fm_panic("Urgent Error");
2109 }
2110 
2111 /*
2112  * Initialization error counters resetting.
2113  */
2114 /* ARGSUSED */
2115 static void
2116 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2117 {
2118 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2119 	hdlr->cyh_level = CY_LOW_LEVEL;
2120 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2121 
2122 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2123 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2124 }
2125 
2126 void
2127 cpu_mp_init(void)
2128 {
2129 	cyc_omni_handler_t hdlr;
2130 
2131 	hdlr.cyo_online = opl_ras_online;
2132 	hdlr.cyo_offline = NULL;
2133 	hdlr.cyo_arg = NULL;
2134 	mutex_enter(&cpu_lock);
2135 	(void) cyclic_add_omni(&hdlr);
2136 	mutex_exit(&cpu_lock);
2137 }
2138 
2139 /*ARGSUSED*/
2140 void
2141 mmu_init_kernel_pgsz(struct hat *hat)
2142 {
2143 }
2144 
2145 size_t
2146 mmu_get_kernel_lpsize(size_t lpsize)
2147 {
2148 	uint_t tte;
2149 
2150 	if (lpsize == 0) {
2151 		/* no setting for segkmem_lpsize in /etc/system: use default */
2152 		return (MMU_PAGESIZE4M);
2153 	}
2154 
2155 	for (tte = TTE8K; tte <= TTE4M; tte++) {
2156 		if (lpsize == TTEBYTES(tte))
2157 			return (lpsize);
2158 	}
2159 
2160 	return (TTEBYTES(TTE8K));
2161 }
2162 
2163 /*
2164  * The following are functions that are unused in
2165  * OPL cpu module. They are defined here to resolve
2166  * dependencies in the "unix" module.
2167  * Unused functions that should never be called in
2168  * OPL are coded with ASSERT(0).
2169  */
2170 
2171 void
2172 cpu_disable_errors(void)
2173 {}
2174 
2175 void
2176 cpu_enable_errors(void)
2177 { ASSERT(0); }
2178 
2179 /*ARGSUSED*/
2180 void
2181 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2182 { ASSERT(0); }
2183 
2184 /*ARGSUSED*/
2185 void
2186 cpu_faulted_enter(struct cpu *cp)
2187 {}
2188 
2189 /*ARGSUSED*/
2190 void
2191 cpu_faulted_exit(struct cpu *cp)
2192 {}
2193 
2194 /*ARGSUSED*/
2195 void
2196 cpu_check_allcpus(struct async_flt *aflt)
2197 {}
2198 
2199 /*ARGSUSED*/
2200 void
2201 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2202 { ASSERT(0); }
2203 
2204 /*ARGSUSED*/
2205 void
2206 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2207 { ASSERT(0); }
2208 
2209 /*ARGSUSED*/
2210 void
2211 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2212 { ASSERT(0); }
2213 
2214 /*ARGSUSED*/
2215 void
2216 cpu_busy_ecache_scrub(struct cpu *cp)
2217 {}
2218 
2219 /*ARGSUSED*/
2220 void
2221 cpu_idle_ecache_scrub(struct cpu *cp)
2222 {}
2223 
2224 /* ARGSUSED */
2225 void
2226 cpu_change_speed(uint64_t divisor, uint64_t arg2)
2227 { ASSERT(0); }
2228 
2229 void
2230 cpu_init_cache_scrub(void)
2231 {}
2232 
2233 /* ARGSUSED */
2234 int
2235 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2236 {
2237 	return (ENOTSUP);
2238 }
2239 
2240 /* ARGSUSED */
2241 int
2242 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2243 {
2244 	return (ENOTSUP);
2245 }
2246 
2247 /* ARGSUSED */
2248 int
2249 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2250 {
2251 	return (ENOTSUP);
2252 }
2253 
2254 /*ARGSUSED*/
2255 void
2256 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2257 { ASSERT(0); }
2258 
2259 /*ARGSUSED*/
2260 void
2261 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2262 { ASSERT(0); }
2263