xref: /illumos-gate/usr/src/uts/sun4u/cpu/opl_olympus.c (revision 843e19887f64dde75055cf8842fc4db2171eff45)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII).
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/ddi.h>
35 #include <sys/sysmacros.h>
36 #include <sys/archsystm.h>
37 #include <sys/vmsystm.h>
38 #include <sys/machparam.h>
39 #include <sys/machsystm.h>
40 #include <sys/machthread.h>
41 #include <sys/cpu.h>
42 #include <sys/cmp.h>
43 #include <sys/elf_SPARC.h>
44 #include <vm/vm_dep.h>
45 #include <vm/hat_sfmmu.h>
46 #include <vm/seg_kpm.h>
47 #include <vm/seg_kmem.h>
48 #include <sys/cpuvar.h>
49 #include <sys/opl_olympus_regs.h>
50 #include <sys/opl_module.h>
51 #include <sys/async.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/dditypes.h>
55 #include <sys/cpu_module.h>
56 #include <sys/sysmacros.h>
57 #include <sys/intreg.h>
58 #include <sys/clock.h>
59 #include <sys/platform_module.h>
60 #include <sys/ontrap.h>
61 #include <sys/panic.h>
62 #include <sys/memlist.h>
63 #include <sys/ndifm.h>
64 #include <sys/ddifm.h>
65 #include <sys/fm/protocol.h>
66 #include <sys/fm/util.h>
67 #include <sys/fm/cpu/SPARC64-VI.h>
68 #include <sys/dtrace.h>
69 #include <sys/watchpoint.h>
70 #include <sys/promif.h>
71 
72 /*
73  * Internal functions.
74  */
75 static int cpu_sync_log_err(void *flt);
76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
78 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
79 
80 /*
81  * Error counters resetting interval.
82  */
83 static int opl_async_check_interval = 60;		/* 1 min */
84 
85 uint_t cpu_impl_dual_pgsz = 1;
86 
87 /*
88  * PA[22:0] represent Displacement in Jupiter
89  * configuration space.
90  */
91 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
92 
93 /*
94  * set in /etc/system to control logging of user BERR/TO's
95  */
96 int cpu_berr_to_verbose = 0;
97 
98 /*
99  * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled).
100  */
101 int cpu_alljupiter = 0;
102 
103 static int min_ecache_size;
104 static uint_t priv_hcl_1;
105 static uint_t priv_hcl_2;
106 static uint_t priv_hcl_4;
107 static uint_t priv_hcl_8;
108 
109 /*
110  * Olympus error log
111  */
112 static opl_errlog_t	*opl_err_log;
113 
114 /*
115  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
116  * No any other ecc_type_info insertion is allowed in between the following
117  * four UE classess.
118  */
119 ecc_type_to_info_t ecc_type_to_info[] = {
120 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
121 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
122 	FM_EREPORT_CPU_UE_MEM,
123 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
124 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
125 	FM_EREPORT_CPU_UE_CHANNEL,
126 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
127 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
128 	FM_EREPORT_CPU_UE_CPU,
129 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
130 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
131 	FM_EREPORT_CPU_UE_PATH,
132 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
133 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
134 	FM_EREPORT_CPU_BERR,
135 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
136 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
137 	FM_EREPORT_CPU_BTO,
138 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
139 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
140 	FM_EREPORT_CPU_MTLB,
141 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
142 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
143 	FM_EREPORT_CPU_TLBP,
144 
145 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
146 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
147 	FM_EREPORT_CPU_CRE,
148 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
149 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
150 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
151 	FM_EREPORT_CPU_TSBCTX,
152 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
153 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
154 	FM_EREPORT_CPU_TSBP,
155 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
156 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
157 	FM_EREPORT_CPU_PSTATE,
158 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
159 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
160 	FM_EREPORT_CPU_TSTATE,
161 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
162 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
163 	FM_EREPORT_CPU_IUG_F,
164 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
165 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
166 	FM_EREPORT_CPU_IUG_R,
167 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
168 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
169 	FM_EREPORT_CPU_SDC,
170 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
171 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
172 	FM_EREPORT_CPU_WDT,
173 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
174 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
175 	FM_EREPORT_CPU_DTLB,
176 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
177 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
178 	FM_EREPORT_CPU_ITLB,
179 	UGESR_IUG_COREERR, "IUG_COREERR",
180 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
181 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
182 	FM_EREPORT_CPU_CORE,
183 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
184 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
185 	FM_EREPORT_CPU_DAE,
186 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
187 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
188 	FM_EREPORT_CPU_IAE,
189 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
190 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
191 	FM_EREPORT_CPU_UGE,
192 	0,		NULL,		0,		0,
193 	NULL,  0,	   0,
194 };
195 
196 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
197 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
198 		int *segsp, int *banksp, int *mcidp);
199 
200 
201 /*
202  * Setup trap handlers for 0xA, 0x32, 0x40 trap types.
203  */
204 void
205 cpu_init_trap(void)
206 {
207 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
208 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
209 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
210 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
211 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
212 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
213 }
214 
215 static int
216 getintprop(pnode_t node, char *name, int deflt)
217 {
218 	int	value;
219 
220 	switch (prom_getproplen(node, name)) {
221 	case sizeof (int):
222 		(void) prom_getprop(node, name, (caddr_t)&value);
223 		break;
224 
225 	default:
226 		value = deflt;
227 		break;
228 	}
229 
230 	return (value);
231 }
232 
233 /*
234  * Set the magic constants of the implementation.
235  */
236 /*ARGSUSED*/
237 void
238 cpu_fiximp(pnode_t dnode)
239 {
240 	int i, a;
241 	extern int vac_size, vac_shift;
242 	extern uint_t vac_mask;
243 
244 	static struct {
245 		char	*name;
246 		int	*var;
247 		int	defval;
248 	} prop[] = {
249 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
250 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
251 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
252 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
253 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
254 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
255 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
256 	};
257 
258 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
259 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
260 
261 	ecache_setsize = ecache_size / ecache_associativity;
262 
263 	vac_size = OPL_VAC_SIZE;
264 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
265 	i = 0; a = vac_size;
266 	while (a >>= 1)
267 		++i;
268 	vac_shift = i;
269 	shm_alignment = vac_size;
270 	vac = 1;
271 }
272 
273 /*
274  * Enable features for Jupiter-only domains.
275  */
276 void
277 cpu_fix_alljupiter(void)
278 {
279 	cpu_alljupiter = 1;
280 
281 	/*
282 	 * Enable ima hwcap for Jupiter-only domains.  DR will prevent
283 	 * addition of Olympus-C to all-Jupiter domains to preserve ima
284 	 * hwcap semantics.
285 	 */
286 	cpu_hwcap_flags |= AV_SPARC_IMA;
287 }
288 
289 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
290 /*
291  * Quick and dirty way to redefine locally in
292  * OPL the value of IDSR_BN_SETS to 31 instead
293  * of the standard 32 value. This is to workaround
294  * REV_B of Olympus_c processor's problem in handling
295  * more than 31 xcall broadcast.
296  */
297 #undef	IDSR_BN_SETS
298 #define	IDSR_BN_SETS    31
299 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
300 
301 void
302 send_mondo_set(cpuset_t set)
303 {
304 	int lo, busy, nack, shipped = 0;
305 	uint16_t i, cpuids[IDSR_BN_SETS];
306 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
307 	uint64_t starttick, endtick, tick, lasttick;
308 #if (NCPU > IDSR_BN_SETS)
309 	int index = 0;
310 	int ncpuids = 0;
311 #endif
312 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
313 	int bn_sets = IDSR_BN_SETS;
314 	uint64_t ver;
315 
316 	ASSERT(NCPU > bn_sets);
317 #endif
318 
319 	ASSERT(!CPUSET_ISNULL(set));
320 	starttick = lasttick = gettick();
321 
322 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
323 	ver = ultra_getver();
324 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
325 	    ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
326 		bn_sets = 1;
327 #endif
328 
329 #if (NCPU <= IDSR_BN_SETS)
330 	for (i = 0; i < NCPU; i++)
331 		if (CPU_IN_SET(set, i)) {
332 			shipit(i, shipped);
333 			nackmask |= IDSR_NACK_BIT(shipped);
334 			cpuids[shipped++] = i;
335 			CPUSET_DEL(set, i);
336 			if (CPUSET_ISNULL(set))
337 				break;
338 		}
339 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
340 #else
341 	for (i = 0; i < NCPU; i++)
342 		if (CPU_IN_SET(set, i)) {
343 			ncpuids++;
344 
345 			/*
346 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
347 			 * find we have shipped to more than (IDSR_BN_SETS)
348 			 * CPUs, set "index" to the highest numbered CPU in
349 			 * the set so we can ship to other CPUs a bit later on.
350 			 */
351 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
352 			if (shipped < bn_sets) {
353 #else
354 			if (shipped < IDSR_BN_SETS) {
355 #endif
356 				shipit(i, shipped);
357 				nackmask |= IDSR_NACK_BIT(shipped);
358 				cpuids[shipped++] = i;
359 				CPUSET_DEL(set, i);
360 				if (CPUSET_ISNULL(set))
361 					break;
362 			} else
363 				index = (int)i;
364 		}
365 
366 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
367 #endif
368 
369 	busymask = IDSR_NACK_TO_BUSY(nackmask);
370 	busy = nack = 0;
371 	endtick = starttick + xc_tick_limit;
372 	for (;;) {
373 		idsr = getidsr();
374 #if (NCPU <= IDSR_BN_SETS)
375 		if (idsr == 0)
376 			break;
377 #else
378 		if (idsr == 0 && shipped == ncpuids)
379 			break;
380 #endif
381 		tick = gettick();
382 		/*
383 		 * If there is a big jump between the current tick
384 		 * count and lasttick, we have probably hit a break
385 		 * point.  Adjust endtick accordingly to avoid panic.
386 		 */
387 		if (tick > (lasttick + xc_tick_jump_limit))
388 			endtick += (tick - lasttick);
389 		lasttick = tick;
390 		if (tick > endtick) {
391 			if (panic_quiesce)
392 				return;
393 			cmn_err(CE_CONT, "send mondo timeout [%d NACK %d "
394 			    "BUSY]\nIDSR 0x%" PRIx64 "  cpuids:",
395 			    nack, busy, idsr);
396 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
397 			for (i = 0; i < bn_sets; i++) {
398 #else
399 			for (i = 0; i < IDSR_BN_SETS; i++) {
400 #endif
401 				if (idsr & (IDSR_NACK_BIT(i) |
402 				    IDSR_BUSY_BIT(i))) {
403 					cmn_err(CE_CONT, " 0x%x", cpuids[i]);
404 				}
405 			}
406 			cmn_err(CE_CONT, "\n");
407 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
408 		}
409 		curnack = idsr & nackmask;
410 		curbusy = idsr & busymask;
411 
412 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
413 		/*
414 		 * Only proceed to send more xcalls if all the
415 		 * cpus in the previous IDSR_BN_SETS were completed.
416 		 */
417 		if (curbusy) {
418 			busy++;
419 			continue;
420 		}
421 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
422 
423 #if (NCPU > IDSR_BN_SETS)
424 		if (shipped < ncpuids) {
425 			uint64_t cpus_left;
426 			uint16_t next = (uint16_t)index;
427 
428 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
429 			    busymask;
430 
431 			if (cpus_left) {
432 				do {
433 					/*
434 					 * Sequence through and ship to the
435 					 * remainder of the CPUs in the system
436 					 * (e.g. other than the first
437 					 * (IDSR_BN_SETS)) in reverse order.
438 					 */
439 					lo = lowbit(cpus_left) - 1;
440 					i = IDSR_BUSY_IDX(lo);
441 					shipit(next, i);
442 					shipped++;
443 					cpuids[i] = next;
444 
445 					/*
446 					 * If we've processed all the CPUs,
447 					 * exit the loop now and save
448 					 * instructions.
449 					 */
450 					if (shipped == ncpuids)
451 						break;
452 
453 					for ((index = ((int)next - 1));
454 					    index >= 0; index--)
455 						if (CPU_IN_SET(set, index)) {
456 							next = (uint16_t)index;
457 							break;
458 						}
459 
460 					cpus_left &= ~(1ull << lo);
461 				} while (cpus_left);
462 				continue;
463 			}
464 		}
465 #endif
466 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
467 		if (curbusy) {
468 			busy++;
469 			continue;
470 		}
471 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
472 #ifdef SEND_MONDO_STATS
473 		{
474 			int n = gettick() - starttick;
475 			if (n < 8192)
476 				x_nack_stimes[n >> 7]++;
477 		}
478 #endif
479 		while (gettick() < (tick + sys_clock_mhz))
480 			;
481 		do {
482 			lo = lowbit(curnack) - 1;
483 			i = IDSR_NACK_IDX(lo);
484 			shipit(cpuids[i], i);
485 			curnack &= ~(1ull << lo);
486 		} while (curnack);
487 		nack++;
488 		busy = 0;
489 	}
490 #ifdef SEND_MONDO_STATS
491 	{
492 		int n = gettick() - starttick;
493 		if (n < 8192)
494 			x_set_stimes[n >> 7]++;
495 		else
496 			x_set_ltimes[(n >> 13) & 0xf]++;
497 	}
498 	x_set_cpus[shipped]++;
499 #endif
500 }
501 
502 /*
503  * Cpu private initialization.
504  */
505 void
506 cpu_init_private(struct cpu *cp)
507 {
508 	if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) ||
509 	    (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) {
510 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is "
511 		    "supported", cp->cpu_id,
512 		    cpunodes[cp->cpu_id].implementation);
513 	}
514 
515 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
516 }
517 
518 void
519 cpu_setup(void)
520 {
521 	extern int at_flags;
522 	extern int cpc_has_overflow_intr;
523 	uint64_t cpu0_log;
524 	extern	 uint64_t opl_cpu0_err_log;
525 
526 	/*
527 	 * Initialize Error log Scratch register for error handling.
528 	 */
529 
530 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
531 	opl_error_setup(cpu0_log);
532 
533 	/*
534 	 * Enable MMU translating multiple page sizes for
535 	 * sITLB and sDTLB.
536 	 */
537 	opl_mpg_enable();
538 
539 	/*
540 	 * Setup chip-specific trap handlers.
541 	 */
542 	cpu_init_trap();
543 
544 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
545 
546 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
547 
548 	/*
549 	 * Due to the number of entries in the fully-associative tlb
550 	 * this may have to be tuned lower than in spitfire.
551 	 */
552 	pp_slots = MIN(8, MAXPP_SLOTS);
553 
554 	/*
555 	 * Block stores do not invalidate all pages of the d$, pagecopy
556 	 * et. al. need virtual translations with virtual coloring taken
557 	 * into consideration.  prefetch/ldd will pollute the d$ on the
558 	 * load side.
559 	 */
560 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
561 
562 	if (use_page_coloring) {
563 		do_pg_coloring = 1;
564 	}
565 
566 	isa_list =
567 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
568 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
569 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
570 
571 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 |
572 	    AV_SPARC_POPC | AV_SPARC_FMAF;
573 
574 	/*
575 	 * On SPARC64-VI, there's no hole in the virtual address space
576 	 */
577 	hole_start = hole_end = 0;
578 
579 	/*
580 	 * The kpm mapping window.
581 	 * kpm_size:
582 	 *	The size of a single kpm range.
583 	 *	The overall size will be: kpm_size * vac_colors.
584 	 * kpm_vbase:
585 	 *	The virtual start address of the kpm range within the kernel
586 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
587 	 */
588 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
589 	kpm_size_shift = 47;
590 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
591 	kpm_smallpages = 1;
592 
593 	/*
594 	 * The traptrace code uses either %tick or %stick for
595 	 * timestamping.  We have %stick so we can use it.
596 	 */
597 	traptrace_use_stick = 1;
598 
599 	/*
600 	 * SPARC64-VI has a performance counter overflow interrupt
601 	 */
602 	cpc_has_overflow_intr = 1;
603 
604 	/*
605 	 * Declare that this architecture/cpu combination does not support
606 	 * fpRAS.
607 	 */
608 	fpras_implemented = 0;
609 }
610 
611 /*
612  * Called by setcpudelay
613  */
614 void
615 cpu_init_tick_freq(void)
616 {
617 	/*
618 	 * For SPARC64-VI we want to use the system clock rate as
619 	 * the basis for low level timing, due to support of mixed
620 	 * speed CPUs and power managment.
621 	 */
622 	if (system_clock_freq == 0)
623 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
624 
625 	sys_tick_freq = system_clock_freq;
626 }
627 
628 #ifdef SEND_MONDO_STATS
629 uint32_t x_one_stimes[64];
630 uint32_t x_one_ltimes[16];
631 uint32_t x_set_stimes[64];
632 uint32_t x_set_ltimes[16];
633 uint32_t x_set_cpus[NCPU];
634 uint32_t x_nack_stimes[64];
635 #endif
636 
637 /*
638  * Note: A version of this function is used by the debugger via the KDI,
639  * and must be kept in sync with this version.  Any changes made to this
640  * function to support new chips or to accomodate errata must also be included
641  * in the KDI-specific version.  See us3_kdi.c.
642  */
643 void
644 send_one_mondo(int cpuid)
645 {
646 	int busy, nack;
647 	uint64_t idsr, starttick, endtick, tick, lasttick;
648 	uint64_t busymask;
649 
650 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
651 	starttick = lasttick = gettick();
652 	shipit(cpuid, 0);
653 	endtick = starttick + xc_tick_limit;
654 	busy = nack = 0;
655 	busymask = IDSR_BUSY;
656 	for (;;) {
657 		idsr = getidsr();
658 		if (idsr == 0)
659 			break;
660 
661 		tick = gettick();
662 		/*
663 		 * If there is a big jump between the current tick
664 		 * count and lasttick, we have probably hit a break
665 		 * point.  Adjust endtick accordingly to avoid panic.
666 		 */
667 		if (tick > (lasttick + xc_tick_jump_limit))
668 			endtick += (tick - lasttick);
669 		lasttick = tick;
670 		if (tick > endtick) {
671 			if (panic_quiesce)
672 				return;
673 			cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) "
674 			    "[%d NACK %d BUSY]", cpuid, nack, busy);
675 		}
676 
677 		if (idsr & busymask) {
678 			busy++;
679 			continue;
680 		}
681 		drv_usecwait(1);
682 		shipit(cpuid, 0);
683 		nack++;
684 		busy = 0;
685 	}
686 #ifdef SEND_MONDO_STATS
687 	{
688 		int n = gettick() - starttick;
689 		if (n < 8192)
690 			x_one_stimes[n >> 7]++;
691 		else
692 			x_one_ltimes[(n >> 13) & 0xf]++;
693 	}
694 #endif
695 }
696 
697 /*
698  * init_mmu_page_sizes is set to one after the bootup time initialization
699  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
700  * valid value.
701  *
702  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
703  * versions of disable_ism_large_pages and disable_large_pages, and feed back
704  * into those two hat variables at hat initialization time.
705  *
706  */
707 int init_mmu_page_sizes = 0;
708 
709 static uint_t mmu_disable_large_pages = 0;
710 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
711 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
712 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
713 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
714 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
715 	(1 << TTE512K));
716 
717 /*
718  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
719  * Called during very early bootup from check_cpus_set().
720  * Can be called to verify that mmu_page_sizes are set up correctly.
721  *
722  * Set Olympus defaults. We do not use the function parameter.
723  */
724 /*ARGSUSED*/
725 int
726 mmu_init_mmu_page_sizes(int32_t not_used)
727 {
728 	if (!init_mmu_page_sizes) {
729 		mmu_page_sizes = MMU_PAGE_SIZES;
730 		mmu_hashcnt = MAX_HASHCNT;
731 		mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
732 		mmu_exported_pagesize_mask = (1 << TTE8K) |
733 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
734 		    (1 << TTE32M) | (1 << TTE256M);
735 		init_mmu_page_sizes = 1;
736 		return (0);
737 	}
738 	return (1);
739 }
740 
741 /* SPARC64-VI worst case DTLB parameters */
742 #ifndef	LOCKED_DTLB_ENTRIES
743 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
744 #endif
745 #define	TOTAL_DTLB_ENTRIES	32
746 #define	AVAIL_32M_ENTRIES	0
747 #define	AVAIL_256M_ENTRIES	0
748 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
749 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
750 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
751 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
752 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
753 
754 /*
755  * The function returns the mmu-specific values for the
756  * hat's disable_large_pages, disable_ism_large_pages, and
757  * disable_auto_data_large_pages and
758  * disable_text_data_large_pages variables.
759  */
760 uint_t
761 mmu_large_pages_disabled(uint_t flag)
762 {
763 	uint_t pages_disable = 0;
764 	extern int use_text_pgsz64K;
765 	extern int use_text_pgsz512K;
766 
767 	if (flag == HAT_LOAD) {
768 		pages_disable =  mmu_disable_large_pages;
769 	} else if (flag == HAT_LOAD_SHARE) {
770 		pages_disable = mmu_disable_ism_large_pages;
771 	} else if (flag == HAT_AUTO_DATA) {
772 		pages_disable = mmu_disable_auto_data_large_pages;
773 	} else if (flag == HAT_AUTO_TEXT) {
774 		pages_disable = mmu_disable_auto_text_large_pages;
775 		if (use_text_pgsz512K) {
776 			pages_disable &= ~(1 << TTE512K);
777 		}
778 		if (use_text_pgsz64K) {
779 			pages_disable &= ~(1 << TTE64K);
780 		}
781 	}
782 	return (pages_disable);
783 }
784 
785 /*
786  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
787  * It may be called from set_platform_defaults, if some value other than 32M
788  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
789  * then only warn, since it would be bad form to panic due to a user typo.
790  *
791  * The function re-initializes the mmu_disable_ism_large_pages variable.
792  */
793 void
794 mmu_init_large_pages(size_t ism_pagesize)
795 {
796 	switch (ism_pagesize) {
797 	case MMU_PAGESIZE4M:
798 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
799 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
800 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
801 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
802 		break;
803 	case MMU_PAGESIZE32M:
804 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
805 		    (1 << TTE512K) | (1 << TTE256M));
806 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
807 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
808 		adjust_data_maxlpsize(ism_pagesize);
809 		break;
810 	case MMU_PAGESIZE256M:
811 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
812 		    (1 << TTE512K) | (1 << TTE32M));
813 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
814 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
815 		adjust_data_maxlpsize(ism_pagesize);
816 		break;
817 	default:
818 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
819 		    ism_pagesize);
820 		break;
821 	}
822 }
823 
824 /*
825  * Function to reprogram the TLBs when page sizes used
826  * by a process change significantly.
827  */
828 void
829 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
830 {
831 	uint8_t pgsz0, pgsz1;
832 
833 	/*
834 	 * Don't program 2nd dtlb for kernel and ism hat
835 	 */
836 	ASSERT(hat->sfmmu_ismhat == NULL);
837 	ASSERT(hat != ksfmmup);
838 
839 	/*
840 	 * hat->sfmmu_pgsz[] is an array whose elements
841 	 * contain a sorted order of page sizes.  Element
842 	 * 0 is the most commonly used page size, followed
843 	 * by element 1, and so on.
844 	 *
845 	 * ttecnt[] is an array of per-page-size page counts
846 	 * mapped into the process.
847 	 *
848 	 * If the HAT's choice for page sizes is unsuitable,
849 	 * we can override it here.  The new values written
850 	 * to the array will be handed back to us later to
851 	 * do the actual programming of the TLB hardware.
852 	 *
853 	 */
854 	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
855 	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
856 
857 	/*
858 	 * This implements PAGESIZE programming of the sTLB
859 	 * if large TTE counts don't exceed the thresholds.
860 	 */
861 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
862 		pgsz0 = page_szc(MMU_PAGESIZE);
863 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
864 		pgsz1 = page_szc(MMU_PAGESIZE);
865 	tmp_pgsz[0] = pgsz0;
866 	tmp_pgsz[1] = pgsz1;
867 	/* otherwise, accept what the HAT chose for us */
868 }
869 
870 /*
871  * The HAT calls this function when an MMU context is allocated so that we
872  * can reprogram the large TLBs appropriately for the new process using
873  * the context.
874  *
875  * The caller must hold the HAT lock.
876  */
877 void
878 mmu_set_ctx_page_sizes(struct hat *hat)
879 {
880 	uint8_t pgsz0, pgsz1;
881 	uint8_t new_cext;
882 
883 	ASSERT(sfmmu_hat_lock_held(hat));
884 	/*
885 	 * Don't program 2nd dtlb for kernel and ism hat
886 	 */
887 	if (hat->sfmmu_ismhat || hat == ksfmmup)
888 		return;
889 
890 	/*
891 	 * If supported, reprogram the TLBs to a larger pagesize.
892 	 */
893 	pgsz0 = hat->sfmmu_pgsz[0];
894 	pgsz1 = hat->sfmmu_pgsz[1];
895 	ASSERT(pgsz0 < mmu_page_sizes);
896 	ASSERT(pgsz1 < mmu_page_sizes);
897 	new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
898 	if (hat->sfmmu_cext != new_cext) {
899 #ifdef DEBUG
900 		int i;
901 		/*
902 		 * assert cnum should be invalid, this is because pagesize
903 		 * can only be changed after a proc's ctxs are invalidated.
904 		 */
905 		for (i = 0; i < max_mmu_ctxdoms; i++) {
906 			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
907 		}
908 #endif /* DEBUG */
909 		hat->sfmmu_cext = new_cext;
910 	}
911 	/*
912 	 * sfmmu_setctx_sec() will take care of the
913 	 * rest of the dirty work for us.
914 	 */
915 }
916 
917 /*
918  * This function assumes that there are either four or six supported page
919  * sizes and at most two programmable TLBs, so we need to decide which
920  * page sizes are most important and then adjust the TLB page sizes
921  * accordingly (if supported).
922  *
923  * If these assumptions change, this function will need to be
924  * updated to support whatever the new limits are.
925  */
926 void
927 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
928 {
929 	uint64_t sortcnt[MMU_PAGE_SIZES];
930 	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
931 	uint8_t i, j, max;
932 	uint16_t oldval, newval;
933 
934 	/*
935 	 * We only consider reprogramming the TLBs if one or more of
936 	 * the two most used page sizes changes and we're using
937 	 * large pages in this process.
938 	 */
939 	if (SFMMU_LGPGS_INUSE(sfmmup)) {
940 		/* Sort page sizes. */
941 		for (i = 0; i < mmu_page_sizes; i++) {
942 			sortcnt[i] = ttecnt[i];
943 		}
944 		for (j = 0; j < mmu_page_sizes; j++) {
945 			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
946 				if (sortcnt[i] > sortcnt[max])
947 					max = i;
948 			}
949 			tmp_pgsz[j] = max;
950 			sortcnt[max] = 0;
951 		}
952 
953 		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
954 
955 		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
956 
957 		/* Check 2 largest values after the sort. */
958 		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
959 		if (newval != oldval) {
960 			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
961 		}
962 	}
963 }
964 
965 /*
966  * Return processor specific async error structure
967  * size used.
968  */
969 int
970 cpu_aflt_size(void)
971 {
972 	return (sizeof (opl_async_flt_t));
973 }
974 
975 /*
976  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
977  * post-process CPU events that are dequeued.  As such, it can be invoked
978  * from softint context, from AST processing in the trap() flow, or from the
979  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
980  * Historically this entry point was used to log the actual cmn_err(9F) text;
981  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
982  * With FMA this function now also returns a flag which indicates to the
983  * caller whether the ereport should be posted (1) or suppressed (0).
984  */
985 /*ARGSUSED*/
986 static int
987 cpu_sync_log_err(void *flt)
988 {
989 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
990 	struct async_flt *aflt = (struct async_flt *)flt;
991 
992 	/*
993 	 * No extra processing of urgent error events.
994 	 * Always generate ereports for these events.
995 	 */
996 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
997 		return (1);
998 
999 	/*
1000 	 * Additional processing for synchronous errors.
1001 	 */
1002 	switch (opl_flt->flt_type) {
1003 	case OPL_CPU_INV_SFSR:
1004 		return (1);
1005 
1006 	case OPL_CPU_SYNC_UE:
1007 		/*
1008 		 * The validity: SFSR_MK_UE bit has been checked
1009 		 * in opl_cpu_sync_error()
1010 		 * No more check is required.
1011 		 *
1012 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
1013 		 * and they have been retrieved in cpu_queue_events()
1014 		 */
1015 
1016 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
1017 			ASSERT(aflt->flt_in_memory);
1018 			/*
1019 			 * We want to skip logging only if ALL the following
1020 			 * conditions are true:
1021 			 *
1022 			 *	1. We are not panicing already.
1023 			 *	2. The error is a memory error.
1024 			 *	3. There is only one error.
1025 			 *	4. The error is on a retired page.
1026 			 *	5. The error occurred under on_trap
1027 			 *	protection AFLT_PROT_EC
1028 			 */
1029 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1030 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1031 				/*
1032 				 * Do not log an error from
1033 				 * the retired page
1034 				 */
1035 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1036 				return (0);
1037 			}
1038 			if (!panicstr)
1039 				cpu_page_retire(opl_flt);
1040 		}
1041 		return (1);
1042 
1043 	case OPL_CPU_SYNC_OTHERS:
1044 		/*
1045 		 * For the following error cases, the processor HW does
1046 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1047 		 * to assign appropriate values here to reflect what we
1048 		 * think is the most likely cause of the problem w.r.t to
1049 		 * the particular error event.  For Buserr and timeout
1050 		 * error event, we will assign OPL_ERRID_CHANNEL as the
1051 		 * most likely reason.  For TLB parity or multiple hit
1052 		 * error events, we will assign the reason as
1053 		 * OPL_ERRID_CPU (cpu related problem) and set the
1054 		 * flt_eid_sid to point to the cpuid.
1055 		 */
1056 
1057 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1058 			/*
1059 			 * flt_eid_sid will not be used for this case.
1060 			 */
1061 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1062 		}
1063 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1064 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1065 			opl_flt->flt_eid_sid = aflt->flt_inst;
1066 		}
1067 
1068 		/*
1069 		 * In case of no effective error bit
1070 		 */
1071 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1072 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1073 			opl_flt->flt_eid_sid = aflt->flt_inst;
1074 		}
1075 		break;
1076 
1077 		default:
1078 			return (1);
1079 	}
1080 	return (1);
1081 }
1082 
1083 /*
1084  * Retire the bad page that may contain the flushed error.
1085  */
1086 void
1087 cpu_page_retire(opl_async_flt_t *opl_flt)
1088 {
1089 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1090 	(void) page_retire(aflt->flt_addr, PR_UE);
1091 }
1092 
1093 /*
1094  * Invoked by error_init() early in startup and therefore before
1095  * startup_errorq() is called to drain any error Q -
1096  *
1097  * startup()
1098  *   startup_end()
1099  *     error_init()
1100  *       cpu_error_init()
1101  * errorq_init()
1102  *   errorq_drain()
1103  * start_other_cpus()
1104  *
1105  * The purpose of this routine is to create error-related taskqs.  Taskqs
1106  * are used for this purpose because cpu_lock can't be grabbed from interrupt
1107  * context.
1108  *
1109  */
1110 /*ARGSUSED*/
1111 void
1112 cpu_error_init(int items)
1113 {
1114 	opl_err_log = (opl_errlog_t *)
1115 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1116 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1117 		cmn_err(CE_PANIC, "The base address of the error log "
1118 		    "is not page aligned");
1119 }
1120 
1121 /*
1122  * We route all errors through a single switch statement.
1123  */
1124 void
1125 cpu_ue_log_err(struct async_flt *aflt)
1126 {
1127 	switch (aflt->flt_class) {
1128 	case CPU_FAULT:
1129 		if (cpu_sync_log_err(aflt))
1130 			cpu_ereport_post(aflt);
1131 		break;
1132 
1133 	case BUS_FAULT:
1134 		bus_async_log_err(aflt);
1135 		break;
1136 
1137 	default:
1138 		cmn_err(CE_WARN, "discarding async error %p with invalid "
1139 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1140 		return;
1141 	}
1142 }
1143 
1144 /*
1145  * Routine for panic hook callback from panic_idle().
1146  *
1147  * Nothing to do here.
1148  */
1149 void
1150 cpu_async_panic_callb(void)
1151 {
1152 }
1153 
1154 /*
1155  * Routine to return a string identifying the physical name
1156  * associated with a memory/cache error.
1157  */
1158 /*ARGSUSED*/
1159 int
1160 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1161     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1162     ushort_t flt_status, char *buf, int buflen, int *lenp)
1163 {
1164 	int synd_code;
1165 	int ret;
1166 
1167 	/*
1168 	 * An AFSR of -1 defaults to a memory syndrome.
1169 	 */
1170 	synd_code = (int)flt_synd;
1171 
1172 	if (&plat_get_mem_unum) {
1173 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1174 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1175 			buf[0] = '\0';
1176 			*lenp = 0;
1177 		}
1178 		return (ret);
1179 	}
1180 	buf[0] = '\0';
1181 	*lenp = 0;
1182 	return (ENOTSUP);
1183 }
1184 
1185 /*
1186  * Wrapper for cpu_get_mem_unum() routine that takes an
1187  * async_flt struct rather than explicit arguments.
1188  */
1189 int
1190 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1191     char *buf, int buflen, int *lenp)
1192 {
1193 	/*
1194 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1195 	 * memory error.
1196 	 */
1197 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1198 	    (uint64_t)-1,
1199 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1200 	    aflt->flt_status, buf, buflen, lenp));
1201 }
1202 
1203 /*
1204  * This routine is a more generic interface to cpu_get_mem_unum()
1205  * that may be used by other modules (e.g. mm).
1206  */
1207 /*ARGSUSED*/
1208 int
1209 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1210     char *buf, int buflen, int *lenp)
1211 {
1212 	int synd_status, flt_in_memory, ret;
1213 	ushort_t flt_status = 0;
1214 	char unum[UNUM_NAMLEN];
1215 
1216 	/*
1217 	 * Check for an invalid address.
1218 	 */
1219 	if (afar == (uint64_t)-1)
1220 		return (ENXIO);
1221 
1222 	if (synd == (uint64_t)-1)
1223 		synd_status = AFLT_STAT_INVALID;
1224 	else
1225 		synd_status = AFLT_STAT_VALID;
1226 
1227 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1228 	    pf_is_memory(afar >> MMU_PAGESHIFT);
1229 
1230 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1231 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
1232 	if (ret != 0)
1233 		return (ret);
1234 
1235 	if (*lenp >= buflen)
1236 		return (ENAMETOOLONG);
1237 
1238 	(void) strncpy(buf, unum, buflen);
1239 
1240 	return (0);
1241 }
1242 
1243 /*
1244  * Routine to return memory information associated
1245  * with a physical address and syndrome.
1246  */
1247 /*ARGSUSED*/
1248 int
1249 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1250     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1251     int *segsp, int *banksp, int *mcidp)
1252 {
1253 	int synd_code = (int)synd;
1254 
1255 	if (afar == (uint64_t)-1)
1256 		return (ENXIO);
1257 
1258 	if (p2get_mem_info != NULL)
1259 		return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep,
1260 		    bank_sizep, segsp, banksp, mcidp));
1261 	else
1262 		return (ENOTSUP);
1263 }
1264 
1265 /*
1266  * Routine to return a string identifying the physical
1267  * name associated with a cpuid.
1268  */
1269 int
1270 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1271 {
1272 	int ret;
1273 	char unum[UNUM_NAMLEN];
1274 
1275 	if (&plat_get_cpu_unum) {
1276 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN,
1277 		    lenp)) != 0)
1278 			return (ret);
1279 	} else {
1280 		return (ENOTSUP);
1281 	}
1282 
1283 	if (*lenp >= buflen)
1284 		return (ENAMETOOLONG);
1285 
1286 	(void) strncpy(buf, unum, *lenp);
1287 
1288 	return (0);
1289 }
1290 
1291 /*
1292  * This routine exports the name buffer size.
1293  */
1294 size_t
1295 cpu_get_name_bufsize()
1296 {
1297 	return (UNUM_NAMLEN);
1298 }
1299 
1300 /*
1301  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1302  */
1303 void
1304 cpu_flush_ecache(void)
1305 {
1306 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1307 	    cpunodes[CPU->cpu_id].ecache_linesize);
1308 }
1309 
1310 static uint8_t
1311 flt_to_trap_type(struct async_flt *aflt)
1312 {
1313 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1314 		return (TRAP_TYPE_ECC_I);
1315 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1316 		return (TRAP_TYPE_ECC_D);
1317 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1318 		return (TRAP_TYPE_URGENT);
1319 	return (TRAP_TYPE_UNKNOWN);
1320 }
1321 
1322 /*
1323  * Encode the data saved in the opl_async_flt_t struct into
1324  * the FM ereport payload.
1325  */
1326 /* ARGSUSED */
1327 static void
1328 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1329 		nvlist_t *resource)
1330 {
1331 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1332 	char unum[UNUM_NAMLEN];
1333 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1334 	int len;
1335 
1336 
1337 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1338 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1339 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1340 	}
1341 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1342 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1343 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1344 	}
1345 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1346 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1347 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1348 	}
1349 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1350 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1351 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1352 	}
1353 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1354 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1355 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1356 	}
1357 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1358 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1359 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1360 	}
1361 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1362 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1363 		    DATA_TYPE_BOOLEAN_VALUE,
1364 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1365 	}
1366 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1367 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1368 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1369 	}
1370 
1371 	switch (opl_flt->flt_eid_mod) {
1372 	case OPL_ERRID_CPU:
1373 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1374 		    (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1375 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1376 		    NULL, opl_flt->flt_eid_sid,
1377 		    (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf);
1378 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1379 		    DATA_TYPE_NVLIST, resource, NULL);
1380 		break;
1381 
1382 	case OPL_ERRID_CHANNEL:
1383 		/*
1384 		 * No resource is created but the cpumem DE will find
1385 		 * the defective path by retreiving EID from SFSR which is
1386 		 * included in the payload.
1387 		 */
1388 		break;
1389 
1390 	case OPL_ERRID_MEM:
1391 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1392 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1393 		    unum, NULL, (uint64_t)-1);
1394 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1395 		    DATA_TYPE_NVLIST, resource, NULL);
1396 		break;
1397 
1398 	case OPL_ERRID_PATH:
1399 		/*
1400 		 * No resource is created but the cpumem DE will find
1401 		 * the defective path by retreiving EID from SFSR which is
1402 		 * included in the payload.
1403 		 */
1404 		break;
1405 	}
1406 }
1407 
1408 /*
1409  * Returns whether fault address is valid for this error bit and
1410  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1411  */
1412 /*ARGSUSED*/
1413 static int
1414 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1415 {
1416 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1417 
1418 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1419 		return ((t_afsr_bit & SFSR_MEMORY) &&
1420 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1421 	}
1422 	return (0);
1423 }
1424 
1425 /*
1426  * In OPL SCF does the stick synchronization.
1427  */
1428 void
1429 sticksync_slave(void)
1430 {
1431 }
1432 
1433 /*
1434  * In OPL SCF does the stick synchronization.
1435  */
1436 void
1437 sticksync_master(void)
1438 {
1439 }
1440 
1441 /*
1442  * Cpu private unitialization.  OPL cpus do not use the private area.
1443  */
1444 void
1445 cpu_uninit_private(struct cpu *cp)
1446 {
1447 	cmp_delete_cpu(cp->cpu_id);
1448 }
1449 
1450 /*
1451  * Always flush an entire cache.
1452  */
1453 void
1454 cpu_error_ecache_flush(void)
1455 {
1456 	cpu_flush_ecache();
1457 }
1458 
1459 void
1460 cpu_ereport_post(struct async_flt *aflt)
1461 {
1462 	char *cpu_type, buf[FM_MAX_CLASS];
1463 	nv_alloc_t *nva = NULL;
1464 	nvlist_t *ereport, *detector, *resource;
1465 	errorq_elem_t *eqep;
1466 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1467 
1468 	if (aflt->flt_panic || panicstr) {
1469 		eqep = errorq_reserve(ereport_errorq);
1470 		if (eqep == NULL)
1471 			return;
1472 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1473 		nva = errorq_elem_nva(ereport_errorq, eqep);
1474 	} else {
1475 		ereport = fm_nvlist_create(nva);
1476 	}
1477 
1478 	/*
1479 	 * Create the scheme "cpu" FMRI.
1480 	 */
1481 	detector = fm_nvlist_create(nva);
1482 	resource = fm_nvlist_create(nva);
1483 	switch (cpunodes[aflt->flt_inst].implementation) {
1484 	case OLYMPUS_C_IMPL:
1485 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1486 		break;
1487 	case JUPITER_IMPL:
1488 		cpu_type = FM_EREPORT_CPU_SPARC64_VII;
1489 		break;
1490 	default:
1491 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1492 		break;
1493 	}
1494 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1495 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1496 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1497 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1498 	    sbuf);
1499 
1500 	/*
1501 	 * Encode all the common data into the ereport.
1502 	 */
1503 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1504 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1505 
1506 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1507 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1508 
1509 	/*
1510 	 * Encode the error specific data that was saved in
1511 	 * the async_flt structure into the ereport.
1512 	 */
1513 	cpu_payload_add_aflt(aflt, ereport, resource);
1514 
1515 	if (aflt->flt_panic || panicstr) {
1516 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1517 	} else {
1518 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1519 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1520 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1521 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1522 	}
1523 }
1524 
1525 void
1526 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1527 {
1528 	int status;
1529 	ddi_fm_error_t de;
1530 
1531 	bzero(&de, sizeof (ddi_fm_error_t));
1532 
1533 	de.fme_version = DDI_FME_VERSION;
1534 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1535 	de.fme_flag = expected;
1536 	de.fme_bus_specific = (void *)aflt->flt_addr;
1537 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1538 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1539 		aflt->flt_panic = 1;
1540 }
1541 
1542 void
1543 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1544     errorq_t *eqp, uint_t flag)
1545 {
1546 	struct async_flt *aflt = (struct async_flt *)payload;
1547 
1548 	aflt->flt_erpt_class = error_class;
1549 	errorq_dispatch(eqp, payload, payload_sz, flag);
1550 }
1551 
1552 void
1553 adjust_hw_copy_limits(int ecache_size)
1554 {
1555 	/*
1556 	 * Set hw copy limits.
1557 	 *
1558 	 * /etc/system will be parsed later and can override one or more
1559 	 * of these settings.
1560 	 *
1561 	 * At this time, ecache size seems only mildly relevant.
1562 	 * We seem to run into issues with the d-cache and stalls
1563 	 * we see on misses.
1564 	 *
1565 	 * Cycle measurement indicates that 2 byte aligned copies fare
1566 	 * little better than doing things with VIS at around 512 bytes.
1567 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1568 	 * aligned is faster whenever the source and destination data
1569 	 * in cache and the total size is less than 2 Kbytes.  The 2K
1570 	 * limit seems to be driven by the 2K write cache.
1571 	 * When more than 2K of copies are done in non-VIS mode, stores
1572 	 * backup in the write cache.  In VIS mode, the write cache is
1573 	 * bypassed, allowing faster cache-line writes aligned on cache
1574 	 * boundaries.
1575 	 *
1576 	 * In addition, in non-VIS mode, there is no prefetching, so
1577 	 * for larger copies, the advantage of prefetching to avoid even
1578 	 * occasional cache misses is enough to justify using the VIS code.
1579 	 *
1580 	 * During testing, it was discovered that netbench ran 3% slower
1581 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1582 	 * applications, data is only used once (copied to the output
1583 	 * buffer, then copied by the network device off the system).  Using
1584 	 * the VIS copy saves more L2 cache state.  Network copies are
1585 	 * around 1.3K to 1.5K in size for historical reasons.
1586 	 *
1587 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1588 	 * aligned copy even for large caches and 8 MB ecache.  The
1589 	 * infrastructure to allow different limits for different sized
1590 	 * caches is kept to allow further tuning in later releases.
1591 	 */
1592 
1593 	if (min_ecache_size == 0 && use_hw_bcopy) {
1594 		/*
1595 		 * First time through - should be before /etc/system
1596 		 * is read.
1597 		 * Could skip the checks for zero but this lets us
1598 		 * preserve any debugger rewrites.
1599 		 */
1600 		if (hw_copy_limit_1 == 0) {
1601 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1602 			priv_hcl_1 = hw_copy_limit_1;
1603 		}
1604 		if (hw_copy_limit_2 == 0) {
1605 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1606 			priv_hcl_2 = hw_copy_limit_2;
1607 		}
1608 		if (hw_copy_limit_4 == 0) {
1609 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1610 			priv_hcl_4 = hw_copy_limit_4;
1611 		}
1612 		if (hw_copy_limit_8 == 0) {
1613 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1614 			priv_hcl_8 = hw_copy_limit_8;
1615 		}
1616 		min_ecache_size = ecache_size;
1617 	} else {
1618 		/*
1619 		 * MP initialization. Called *after* /etc/system has
1620 		 * been parsed. One CPU has already been initialized.
1621 		 * Need to cater for /etc/system having scragged one
1622 		 * of our values.
1623 		 */
1624 		if (ecache_size == min_ecache_size) {
1625 			/*
1626 			 * Same size ecache. We do nothing unless we
1627 			 * have a pessimistic ecache setting. In that
1628 			 * case we become more optimistic (if the cache is
1629 			 * large enough).
1630 			 */
1631 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1632 				/*
1633 				 * Need to adjust hw_copy_limit* from our
1634 				 * pessimistic uniprocessor value to a more
1635 				 * optimistic UP value *iff* it hasn't been
1636 				 * reset.
1637 				 */
1638 				if ((ecache_size > 1048576) &&
1639 				    (priv_hcl_8 == hw_copy_limit_8)) {
1640 					if (ecache_size <= 2097152)
1641 						hw_copy_limit_8 = 4 *
1642 						    VIS_COPY_THRESHOLD;
1643 					else if (ecache_size <= 4194304)
1644 						hw_copy_limit_8 = 4 *
1645 						    VIS_COPY_THRESHOLD;
1646 					else
1647 						hw_copy_limit_8 = 4 *
1648 						    VIS_COPY_THRESHOLD;
1649 					priv_hcl_8 = hw_copy_limit_8;
1650 				}
1651 			}
1652 		} else if (ecache_size < min_ecache_size) {
1653 			/*
1654 			 * A different ecache size. Can this even happen?
1655 			 */
1656 			if (priv_hcl_8 == hw_copy_limit_8) {
1657 				/*
1658 				 * The previous value that we set
1659 				 * is unchanged (i.e., it hasn't been
1660 				 * scragged by /etc/system). Rewrite it.
1661 				 */
1662 				if (ecache_size <= 1048576)
1663 					hw_copy_limit_8 = 8 *
1664 					    VIS_COPY_THRESHOLD;
1665 				else if (ecache_size <= 2097152)
1666 					hw_copy_limit_8 = 8 *
1667 					    VIS_COPY_THRESHOLD;
1668 				else if (ecache_size <= 4194304)
1669 					hw_copy_limit_8 = 8 *
1670 					    VIS_COPY_THRESHOLD;
1671 				else
1672 					hw_copy_limit_8 = 10 *
1673 					    VIS_COPY_THRESHOLD;
1674 				priv_hcl_8 = hw_copy_limit_8;
1675 				min_ecache_size = ecache_size;
1676 			}
1677 		}
1678 	}
1679 }
1680 
1681 #define	VIS_BLOCKSIZE		64
1682 
1683 int
1684 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1685 {
1686 	int ret, watched;
1687 
1688 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1689 	ret = dtrace_blksuword32(addr, data, 0);
1690 	if (watched)
1691 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1692 
1693 	return (ret);
1694 }
1695 
1696 void
1697 opl_cpu_reg_init()
1698 {
1699 	uint64_t	this_cpu_log;
1700 
1701 	/*
1702 	 * We do not need to re-initialize cpu0 registers.
1703 	 */
1704 	if (cpu[getprocessorid()] == &cpu0)
1705 		return;
1706 
1707 	/*
1708 	 * Initialize Error log Scratch register for error handling.
1709 	 */
1710 
1711 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1712 	    ERRLOG_BUFSZ * (getprocessorid())));
1713 	opl_error_setup(this_cpu_log);
1714 
1715 	/*
1716 	 * Enable MMU translating multiple page sizes for
1717 	 * sITLB and sDTLB.
1718 	 */
1719 	opl_mpg_enable();
1720 }
1721 
1722 /*
1723  * Queue one event in ue_queue based on ecc_type_to_info entry.
1724  */
1725 static void
1726 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1727     ecc_type_to_info_t *eccp)
1728 {
1729 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1730 
1731 	if (reason &&
1732 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1733 		(void) strcat(reason, eccp->ec_reason);
1734 	}
1735 
1736 	opl_flt->flt_bit = eccp->ec_afsr_bit;
1737 	opl_flt->flt_type = eccp->ec_flt_type;
1738 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1739 	aflt->flt_payload = eccp->ec_err_payload;
1740 
1741 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1742 	cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt,
1743 	    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1744 }
1745 
1746 /*
1747  * Queue events on async event queue one event per error bit.
1748  * Return number of events queued.
1749  */
1750 int
1751 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1752 {
1753 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1754 	ecc_type_to_info_t *eccp;
1755 	int nevents = 0;
1756 
1757 	/*
1758 	 * Queue expected errors, error bit and fault type must must match
1759 	 * in the ecc_type_to_info table.
1760 	 */
1761 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1762 	    eccp++) {
1763 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1764 		    (eccp->ec_flags & aflt->flt_status) != 0) {
1765 			/*
1766 			 * UE error event can be further
1767 			 * classified/breakdown into finer granularity
1768 			 * based on the flt_eid_mod value set by HW.  We do
1769 			 * special handling here so that we can report UE
1770 			 * error in finer granularity as ue_mem,
1771 			 * ue_channel, ue_cpu or ue_path.
1772 			 */
1773 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1774 				opl_flt->flt_eid_mod = (aflt->flt_stat &
1775 				    SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT;
1776 				opl_flt->flt_eid_sid = (aflt->flt_stat &
1777 				    SFSR_EID_SID) >> SFSR_EID_SID_SHIFT;
1778 				/*
1779 				 * Need to advance eccp pointer by flt_eid_mod
1780 				 * so that we get an appropriate ecc pointer
1781 				 *
1782 				 * EID			# of advances
1783 				 * ----------------------------------
1784 				 * OPL_ERRID_MEM	0
1785 				 * OPL_ERRID_CHANNEL	1
1786 				 * OPL_ERRID_CPU	2
1787 				 * OPL_ERRID_PATH	3
1788 				 */
1789 				eccp += opl_flt->flt_eid_mod;
1790 			}
1791 			cpu_queue_one_event(opl_flt, reason, eccp);
1792 			t_afsr_errs &= ~eccp->ec_afsr_bit;
1793 			nevents++;
1794 		}
1795 	}
1796 
1797 	return (nevents);
1798 }
1799 
1800 /*
1801  * Sync. error wrapper functions.
1802  * We use these functions in order to transfer here from the
1803  * nucleus trap handler information about trap type (data or
1804  * instruction) and trap level (0 or above 0). This way we
1805  * get rid of using SFSR's reserved bits.
1806  */
1807 
1808 #define	OPL_SYNC_TL0	0
1809 #define	OPL_SYNC_TL1	1
1810 #define	OPL_ISYNC_ERR	0
1811 #define	OPL_DSYNC_ERR	1
1812 
1813 void
1814 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1815 {
1816 	uint64_t t_sfar = p_sfar;
1817 	uint64_t t_sfsr = p_sfsr;
1818 
1819 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1820 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1821 }
1822 
1823 void
1824 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1825 {
1826 	uint64_t t_sfar = p_sfar;
1827 	uint64_t t_sfsr = p_sfsr;
1828 
1829 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1830 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1831 }
1832 
1833 void
1834 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1835 {
1836 	uint64_t t_sfar = p_sfar;
1837 	uint64_t t_sfsr = p_sfsr;
1838 
1839 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1840 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1841 }
1842 
1843 void
1844 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1845 {
1846 	uint64_t t_sfar = p_sfar;
1847 	uint64_t t_sfsr = p_sfsr;
1848 
1849 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1850 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1851 }
1852 
1853 /*
1854  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1855  * and TLB_PRT.
1856  * This function is designed based on cpu_deferred_error().
1857  */
1858 
1859 static void
1860 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1861     uint_t tl, uint_t derr)
1862 {
1863 	opl_async_flt_t opl_flt;
1864 	struct async_flt *aflt;
1865 	int trampolined = 0;
1866 	char pr_reason[MAX_REASON_STRING];
1867 	uint64_t log_sfsr;
1868 	int expected = DDI_FM_ERR_UNEXPECTED;
1869 	ddi_acc_hdl_t *hp;
1870 
1871 	/*
1872 	 * We need to look at p_flag to determine if the thread detected an
1873 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1874 	 * because we just need a consistent snapshot and we know that everyone
1875 	 * else will store a consistent set of bits while holding p_lock.  We
1876 	 * don't have to worry about a race because SDOCORE is set once prior
1877 	 * to doing i/o from the process's address space and is never cleared.
1878 	 */
1879 	uint_t pflag = ttoproc(curthread)->p_flag;
1880 
1881 	pr_reason[0] = '\0';
1882 
1883 	/*
1884 	 * handle the specific error
1885 	 */
1886 	bzero(&opl_flt, sizeof (opl_async_flt_t));
1887 	aflt = (struct async_flt *)&opl_flt;
1888 	aflt->flt_id = gethrtime_waitfree();
1889 	aflt->flt_bus_id = getprocessorid();
1890 	aflt->flt_inst = CPU->cpu_id;
1891 	aflt->flt_stat = t_sfsr;
1892 	aflt->flt_addr = t_sfar;
1893 	aflt->flt_pc = (caddr_t)rp->r_pc;
1894 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1895 	aflt->flt_class = (uchar_t)CPU_FAULT;
1896 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate &
1897 	    TSTATE_PRIV) ? 1 : 0));
1898 	aflt->flt_tl = (uchar_t)tl;
1899 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1900 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1901 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1902 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1903 
1904 	/*
1905 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1906 	 * So, clear all error bits to avoid mis-handling and force the system
1907 	 * panicked.
1908 	 * We skip all the procedures below down to the panic message call.
1909 	 */
1910 	if (!(t_sfsr & SFSR_FV)) {
1911 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1912 		aflt->flt_panic = 1;
1913 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1914 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
1915 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1916 		fm_panic("%sErrors(s)", "invalid SFSR");
1917 	}
1918 
1919 	/*
1920 	 * If either UE and MK bit is off, this is not valid UE error.
1921 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1922 	 * mis-handling below.
1923 	 * aflt->flt_stat keeps the original bits as a reference.
1924 	 */
1925 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1926 	    (SFSR_MK_UE|SFSR_UE)) {
1927 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1928 	}
1929 
1930 	/*
1931 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1932 	 * see if we were executing in the kernel under on_trap() or t_lofault
1933 	 * protection.  If so, modify the saved registers so that we return
1934 	 * from the trap to the appropriate trampoline routine.
1935 	 */
1936 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1937 		if (curthread->t_ontrap != NULL) {
1938 			on_trap_data_t *otp = curthread->t_ontrap;
1939 
1940 			if (otp->ot_prot & OT_DATA_EC) {
1941 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1942 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1943 				rp->r_pc = otp->ot_trampoline;
1944 				rp->r_npc = rp->r_pc + 4;
1945 				trampolined = 1;
1946 			}
1947 
1948 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1949 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1950 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
1951 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
1952 				rp->r_pc = otp->ot_trampoline;
1953 				rp->r_npc = rp->r_pc + 4;
1954 				trampolined = 1;
1955 				/*
1956 				 * for peeks and caut_gets errors are expected
1957 				 */
1958 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1959 				if (!hp)
1960 					expected = DDI_FM_ERR_PEEK;
1961 				else if (hp->ah_acc.devacc_attr_access ==
1962 				    DDI_CAUTIOUS_ACC)
1963 					expected = DDI_FM_ERR_EXPECTED;
1964 			}
1965 
1966 		} else if (curthread->t_lofault) {
1967 			aflt->flt_prot = AFLT_PROT_COPY;
1968 			rp->r_g1 = EFAULT;
1969 			rp->r_pc = curthread->t_lofault;
1970 			rp->r_npc = rp->r_pc + 4;
1971 			trampolined = 1;
1972 		}
1973 	}
1974 
1975 	/*
1976 	 * If we're in user mode or we're doing a protected copy, we either
1977 	 * want the ASTON code below to send a signal to the user process
1978 	 * or we want to panic if aft_panic is set.
1979 	 *
1980 	 * If we're in privileged mode and we're not doing a copy, then we
1981 	 * need to check if we've trampolined.  If we haven't trampolined,
1982 	 * we should panic.
1983 	 */
1984 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1985 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
1986 			aflt->flt_panic |= aft_panic;
1987 	} else if (!trampolined) {
1988 		aflt->flt_panic = 1;
1989 	}
1990 
1991 	/*
1992 	 * If we've trampolined due to a privileged TO or BERR, or if an
1993 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1994 	 * event for that TO or BERR.  Queue all other events (if any) besides
1995 	 * the TO/BERR.
1996 	 */
1997 	log_sfsr = t_sfsr;
1998 	if (trampolined) {
1999 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2000 	} else if (!aflt->flt_priv) {
2001 		/*
2002 		 * User mode, suppress messages if
2003 		 * cpu_berr_to_verbose is not set.
2004 		 */
2005 		if (!cpu_berr_to_verbose)
2006 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2007 	}
2008 
2009 	if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason,
2010 	    t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) {
2011 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
2012 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
2013 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
2014 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2015 	}
2016 
2017 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2018 		cpu_run_bus_error_handlers(aflt, expected);
2019 	}
2020 
2021 	/*
2022 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2023 	 * be logged as part of the panic flow.
2024 	 */
2025 	if (aflt->flt_panic) {
2026 		if (pr_reason[0] == 0)
2027 			strcpy(pr_reason, "invalid SFSR ");
2028 
2029 		fm_panic("%sErrors(s)", pr_reason);
2030 	}
2031 
2032 	/*
2033 	 * If we queued an error and we are going to return from the trap and
2034 	 * the error was in user mode or inside of a copy routine, set AST flag
2035 	 * so the queue will be drained before returning to user mode.  The
2036 	 * AST processing will also act on our failure policy.
2037 	 */
2038 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2039 		int pcb_flag = 0;
2040 
2041 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
2042 			pcb_flag |= ASYNC_HWERR;
2043 
2044 		if (t_sfsr & SFSR_BERR)
2045 			pcb_flag |= ASYNC_BERR;
2046 
2047 		if (t_sfsr & SFSR_TO)
2048 			pcb_flag |= ASYNC_BTO;
2049 
2050 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2051 		aston(curthread);
2052 	}
2053 }
2054 
2055 /*ARGSUSED*/
2056 void
2057 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2058 {
2059 	opl_async_flt_t opl_flt;
2060 	struct async_flt *aflt;
2061 	char pr_reason[MAX_REASON_STRING];
2062 
2063 	/* normalize tl */
2064 	tl = (tl >= 2 ? 1 : 0);
2065 	pr_reason[0] = '\0';
2066 
2067 	bzero(&opl_flt, sizeof (opl_async_flt_t));
2068 	aflt = (struct async_flt *)&opl_flt;
2069 	aflt->flt_id = gethrtime_waitfree();
2070 	aflt->flt_bus_id = getprocessorid();
2071 	aflt->flt_inst = CPU->cpu_id;
2072 	aflt->flt_stat = p_ugesr;
2073 	aflt->flt_pc = (caddr_t)rp->r_pc;
2074 	aflt->flt_class = (uchar_t)CPU_FAULT;
2075 	aflt->flt_tl = tl;
2076 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?
2077 	    1 : 0));
2078 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2079 	aflt->flt_panic = 1;
2080 	/*
2081 	 * HW does not set mod/sid in case of urgent error.
2082 	 * So we have to set it here.
2083 	 */
2084 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2085 	opl_flt.flt_eid_sid = aflt->flt_inst;
2086 
2087 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2088 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2089 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2090 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt,
2091 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2092 	}
2093 
2094 	fm_panic("Urgent Error");
2095 }
2096 
2097 /*
2098  * Initialization error counters resetting.
2099  */
2100 /* ARGSUSED */
2101 static void
2102 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2103 {
2104 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2105 	hdlr->cyh_level = CY_LOW_LEVEL;
2106 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2107 
2108 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2109 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2110 }
2111 
2112 void
2113 cpu_mp_init(void)
2114 {
2115 	cyc_omni_handler_t hdlr;
2116 
2117 	hdlr.cyo_online = opl_ras_online;
2118 	hdlr.cyo_offline = NULL;
2119 	hdlr.cyo_arg = NULL;
2120 	mutex_enter(&cpu_lock);
2121 	(void) cyclic_add_omni(&hdlr);
2122 	mutex_exit(&cpu_lock);
2123 }
2124 
2125 int heaplp_use_stlb = 0;
2126 
2127 void
2128 mmu_init_kernel_pgsz(struct hat *hat)
2129 {
2130 	uint_t tte = page_szc(segkmem_lpsize);
2131 	uchar_t new_cext_primary, new_cext_nucleus;
2132 
2133 	if (heaplp_use_stlb == 0) {
2134 		/* do not reprogram stlb */
2135 		tte = TTE8K;
2136 	}
2137 
2138 	new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K);
2139 	new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte);
2140 
2141 	hat->sfmmu_cext = new_cext_primary;
2142 	kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
2143 	    ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
2144 }
2145 
2146 size_t
2147 mmu_get_kernel_lpsize(size_t lpsize)
2148 {
2149 	uint_t tte;
2150 
2151 	if (lpsize == 0) {
2152 		/* no setting for segkmem_lpsize in /etc/system: use default */
2153 		return (MMU_PAGESIZE4M);
2154 	}
2155 
2156 	for (tte = TTE8K; tte <= TTE4M; tte++) {
2157 		if (lpsize == TTEBYTES(tte))
2158 			return (lpsize);
2159 	}
2160 
2161 	return (TTEBYTES(TTE8K));
2162 }
2163 
2164 /*
2165  * The following are functions that are unused in
2166  * OPL cpu module. They are defined here to resolve
2167  * dependencies in the "unix" module.
2168  * Unused functions that should never be called in
2169  * OPL are coded with ASSERT(0).
2170  */
2171 
2172 void
2173 cpu_disable_errors(void)
2174 {}
2175 
2176 void
2177 cpu_enable_errors(void)
2178 { ASSERT(0); }
2179 
2180 /*ARGSUSED*/
2181 void
2182 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2183 { ASSERT(0); }
2184 
2185 /*ARGSUSED*/
2186 void
2187 cpu_faulted_enter(struct cpu *cp)
2188 {}
2189 
2190 /*ARGSUSED*/
2191 void
2192 cpu_faulted_exit(struct cpu *cp)
2193 {}
2194 
2195 /*ARGSUSED*/
2196 void
2197 cpu_check_allcpus(struct async_flt *aflt)
2198 {}
2199 
2200 /*ARGSUSED*/
2201 void
2202 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2203 { ASSERT(0); }
2204 
2205 /*ARGSUSED*/
2206 void
2207 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2208 { ASSERT(0); }
2209 
2210 /*ARGSUSED*/
2211 void
2212 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2213 { ASSERT(0); }
2214 
2215 /*ARGSUSED*/
2216 void
2217 cpu_busy_ecache_scrub(struct cpu *cp)
2218 {}
2219 
2220 /*ARGSUSED*/
2221 void
2222 cpu_idle_ecache_scrub(struct cpu *cp)
2223 {}
2224 
2225 /* ARGSUSED */
2226 void
2227 cpu_change_speed(uint64_t divisor, uint64_t arg2)
2228 { ASSERT(0); }
2229 
2230 void
2231 cpu_init_cache_scrub(void)
2232 {}
2233 
2234 /* ARGSUSED */
2235 int
2236 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2237 {
2238 	if (&plat_get_mem_sid) {
2239 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
2240 	} else {
2241 		return (ENOTSUP);
2242 	}
2243 }
2244 
2245 /* ARGSUSED */
2246 int
2247 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2248 {
2249 	if (&plat_get_mem_addr) {
2250 		return (plat_get_mem_addr(unum, sid, offset, addrp));
2251 	} else {
2252 		return (ENOTSUP);
2253 	}
2254 }
2255 
2256 /* ARGSUSED */
2257 int
2258 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2259 {
2260 	if (&plat_get_mem_offset) {
2261 		return (plat_get_mem_offset(flt_addr, offp));
2262 	} else {
2263 		return (ENOTSUP);
2264 	}
2265 }
2266 
2267 /*ARGSUSED*/
2268 void
2269 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2270 { ASSERT(0); }
2271 
2272 /*ARGSUSED*/
2273 void
2274 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2275 { ASSERT(0); }
2276 
2277 /*ARGSUSED*/
2278 void
2279 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
2280 { ASSERT(0); }
2281 
2282 /*ARGSUSED*/
2283 int
2284 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
2285     errorq_elem_t *eqep, size_t afltoffset)
2286 {
2287 	ASSERT(0);
2288 	return (0);
2289 }
2290 
2291 /*ARGSUSED*/
2292 char *
2293 flt_to_error_type(struct async_flt *aflt)
2294 {
2295 	ASSERT(0);
2296 	return (NULL);
2297 }
2298