xref: /titanic_51/usr/src/uts/sun4u/cpu/opl_olympus.c (revision 85298dc181bc16dc378c47f19f3785559903a8b9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII).
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/ddi.h>
35 #include <sys/sysmacros.h>
36 #include <sys/archsystm.h>
37 #include <sys/vmsystm.h>
38 #include <sys/machparam.h>
39 #include <sys/machsystm.h>
40 #include <sys/machthread.h>
41 #include <sys/cpu.h>
42 #include <sys/cmp.h>
43 #include <sys/elf_SPARC.h>
44 #include <vm/vm_dep.h>
45 #include <vm/hat_sfmmu.h>
46 #include <vm/seg_kpm.h>
47 #include <vm/seg_kmem.h>
48 #include <sys/cpuvar.h>
49 #include <sys/opl_olympus_regs.h>
50 #include <sys/opl_module.h>
51 #include <sys/async.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/dditypes.h>
55 #include <sys/cpu_module.h>
56 #include <sys/sysmacros.h>
57 #include <sys/intreg.h>
58 #include <sys/clock.h>
59 #include <sys/platform_module.h>
60 #include <sys/ontrap.h>
61 #include <sys/panic.h>
62 #include <sys/memlist.h>
63 #include <sys/ndifm.h>
64 #include <sys/ddifm.h>
65 #include <sys/fm/protocol.h>
66 #include <sys/fm/util.h>
67 #include <sys/fm/cpu/SPARC64-VI.h>
68 #include <sys/dtrace.h>
69 #include <sys/watchpoint.h>
70 #include <sys/promif.h>
71 
72 /*
73  * Internal functions.
74  */
75 static int cpu_sync_log_err(void *flt);
76 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
77 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
78 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
79 static int prom_SPARC64VII_support_enabled(void);
80 
81 /*
82  * Error counters resetting interval.
83  */
84 static int opl_async_check_interval = 60;		/* 1 min */
85 
86 uint_t cpu_impl_dual_pgsz = 1;
87 
88 /*
89  * PA[22:0] represent Displacement in Jupiter
90  * configuration space.
91  */
92 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
93 
94 /*
95  * set in /etc/system to control logging of user BERR/TO's
96  */
97 int cpu_berr_to_verbose = 0;
98 
99 /*
100  * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled).
101  */
102 int cpu_alljupiter = 0;
103 
104 static int min_ecache_size;
105 static uint_t priv_hcl_1;
106 static uint_t priv_hcl_2;
107 static uint_t priv_hcl_4;
108 static uint_t priv_hcl_8;
109 
110 /*
111  * Olympus error log
112  */
113 static opl_errlog_t	*opl_err_log;
114 
115 /*
116  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
117  * No any other ecc_type_info insertion is allowed in between the following
118  * four UE classess.
119  */
120 ecc_type_to_info_t ecc_type_to_info[] = {
121 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
122 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
123 	FM_EREPORT_CPU_UE_MEM,
124 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
125 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
126 	FM_EREPORT_CPU_UE_CHANNEL,
127 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
128 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
129 	FM_EREPORT_CPU_UE_CPU,
130 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
131 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
132 	FM_EREPORT_CPU_UE_PATH,
133 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
134 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
135 	FM_EREPORT_CPU_BERR,
136 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
137 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
138 	FM_EREPORT_CPU_BTO,
139 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
140 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
141 	FM_EREPORT_CPU_MTLB,
142 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
143 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
144 	FM_EREPORT_CPU_TLBP,
145 
146 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
147 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
148 	FM_EREPORT_CPU_CRE,
149 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
150 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
151 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
152 	FM_EREPORT_CPU_TSBCTX,
153 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
154 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
155 	FM_EREPORT_CPU_TSBP,
156 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
157 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
158 	FM_EREPORT_CPU_PSTATE,
159 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
160 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
161 	FM_EREPORT_CPU_TSTATE,
162 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
163 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
164 	FM_EREPORT_CPU_IUG_F,
165 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
166 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
167 	FM_EREPORT_CPU_IUG_R,
168 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
169 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
170 	FM_EREPORT_CPU_SDC,
171 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
172 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
173 	FM_EREPORT_CPU_WDT,
174 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
175 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
176 	FM_EREPORT_CPU_DTLB,
177 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
178 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
179 	FM_EREPORT_CPU_ITLB,
180 	UGESR_IUG_COREERR, "IUG_COREERR",
181 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
182 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
183 	FM_EREPORT_CPU_CORE,
184 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
185 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
186 	FM_EREPORT_CPU_DAE,
187 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
188 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
189 	FM_EREPORT_CPU_IAE,
190 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
191 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
192 	FM_EREPORT_CPU_UGE,
193 	0,		NULL,		0,		0,
194 	NULL,  0,	   0,
195 };
196 
197 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
198 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
199 		int *segsp, int *banksp, int *mcidp);
200 
201 
202 /*
203  * Setup trap handlers for 0xA, 0x32, 0x40 trap types.
204  */
205 void
206 cpu_init_trap(void)
207 {
208 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
209 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
210 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
211 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
212 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
213 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
214 }
215 
216 static int
217 getintprop(pnode_t node, char *name, int deflt)
218 {
219 	int	value;
220 
221 	switch (prom_getproplen(node, name)) {
222 	case sizeof (int):
223 		(void) prom_getprop(node, name, (caddr_t)&value);
224 		break;
225 
226 	default:
227 		value = deflt;
228 		break;
229 	}
230 
231 	return (value);
232 }
233 
234 /*
235  * Set the magic constants of the implementation.
236  */
237 /*ARGSUSED*/
238 void
239 cpu_fiximp(pnode_t dnode)
240 {
241 	int i, a;
242 	extern int vac_size, vac_shift;
243 	extern uint_t vac_mask;
244 
245 	static struct {
246 		char	*name;
247 		int	*var;
248 		int	defval;
249 	} prop[] = {
250 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
251 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
252 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
253 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
254 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
255 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
256 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
257 	};
258 
259 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
260 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
261 
262 	ecache_setsize = ecache_size / ecache_associativity;
263 
264 	vac_size = OPL_VAC_SIZE;
265 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
266 	i = 0; a = vac_size;
267 	while (a >>= 1)
268 		++i;
269 	vac_shift = i;
270 	shm_alignment = vac_size;
271 	vac = 1;
272 }
273 
274 /*
275  * Enable features for Jupiter-only domains.
276  */
277 void
278 cpu_fix_alljupiter(void)
279 {
280 	if (!prom_SPARC64VII_support_enabled()) {
281 		/*
282 		 * Do not enable all-Jupiter features and do not turn on
283 		 * the cpu_alljupiter flag.
284 		 */
285 		return;
286 	}
287 
288 	cpu_alljupiter = 1;
289 
290 	/*
291 	 * Enable ima hwcap for Jupiter-only domains.  DR will prevent
292 	 * addition of Olympus-C to all-Jupiter domains to preserve ima
293 	 * hwcap semantics.
294 	 */
295 	cpu_hwcap_flags |= AV_SPARC_IMA;
296 }
297 
298 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
299 /*
300  * Quick and dirty way to redefine locally in
301  * OPL the value of IDSR_BN_SETS to 31 instead
302  * of the standard 32 value. This is to workaround
303  * REV_B of Olympus_c processor's problem in handling
304  * more than 31 xcall broadcast.
305  */
306 #undef	IDSR_BN_SETS
307 #define	IDSR_BN_SETS    31
308 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
309 
310 void
311 send_mondo_set(cpuset_t set)
312 {
313 	int lo, busy, nack, shipped = 0;
314 	uint16_t i, cpuids[IDSR_BN_SETS];
315 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
316 	uint64_t starttick, endtick, tick, lasttick;
317 #if (NCPU > IDSR_BN_SETS)
318 	int index = 0;
319 	int ncpuids = 0;
320 #endif
321 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
322 	int bn_sets = IDSR_BN_SETS;
323 	uint64_t ver;
324 
325 	ASSERT(NCPU > bn_sets);
326 #endif
327 
328 	ASSERT(!CPUSET_ISNULL(set));
329 	starttick = lasttick = gettick();
330 
331 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
332 	ver = ultra_getver();
333 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
334 	    ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
335 		bn_sets = 1;
336 #endif
337 
338 #if (NCPU <= IDSR_BN_SETS)
339 	for (i = 0; i < NCPU; i++)
340 		if (CPU_IN_SET(set, i)) {
341 			shipit(i, shipped);
342 			nackmask |= IDSR_NACK_BIT(shipped);
343 			cpuids[shipped++] = i;
344 			CPUSET_DEL(set, i);
345 			if (CPUSET_ISNULL(set))
346 				break;
347 		}
348 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
349 #else
350 	for (i = 0; i < NCPU; i++)
351 		if (CPU_IN_SET(set, i)) {
352 			ncpuids++;
353 
354 			/*
355 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
356 			 * find we have shipped to more than (IDSR_BN_SETS)
357 			 * CPUs, set "index" to the highest numbered CPU in
358 			 * the set so we can ship to other CPUs a bit later on.
359 			 */
360 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
361 			if (shipped < bn_sets) {
362 #else
363 			if (shipped < IDSR_BN_SETS) {
364 #endif
365 				shipit(i, shipped);
366 				nackmask |= IDSR_NACK_BIT(shipped);
367 				cpuids[shipped++] = i;
368 				CPUSET_DEL(set, i);
369 				if (CPUSET_ISNULL(set))
370 					break;
371 			} else
372 				index = (int)i;
373 		}
374 
375 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
376 #endif
377 
378 	busymask = IDSR_NACK_TO_BUSY(nackmask);
379 	busy = nack = 0;
380 	endtick = starttick + xc_tick_limit;
381 	for (;;) {
382 		idsr = getidsr();
383 #if (NCPU <= IDSR_BN_SETS)
384 		if (idsr == 0)
385 			break;
386 #else
387 		if (idsr == 0 && shipped == ncpuids)
388 			break;
389 #endif
390 		tick = gettick();
391 		/*
392 		 * If there is a big jump between the current tick
393 		 * count and lasttick, we have probably hit a break
394 		 * point.  Adjust endtick accordingly to avoid panic.
395 		 */
396 		if (tick > (lasttick + xc_tick_jump_limit))
397 			endtick += (tick - lasttick);
398 		lasttick = tick;
399 		if (tick > endtick) {
400 			if (panic_quiesce)
401 				return;
402 			cmn_err(CE_CONT, "send mondo timeout [%d NACK %d "
403 			    "BUSY]\nIDSR 0x%" PRIx64 "  cpuids:",
404 			    nack, busy, idsr);
405 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
406 			for (i = 0; i < bn_sets; i++) {
407 #else
408 			for (i = 0; i < IDSR_BN_SETS; i++) {
409 #endif
410 				if (idsr & (IDSR_NACK_BIT(i) |
411 				    IDSR_BUSY_BIT(i))) {
412 					cmn_err(CE_CONT, " 0x%x", cpuids[i]);
413 				}
414 			}
415 			cmn_err(CE_CONT, "\n");
416 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
417 		}
418 		curnack = idsr & nackmask;
419 		curbusy = idsr & busymask;
420 
421 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
422 		/*
423 		 * Only proceed to send more xcalls if all the
424 		 * cpus in the previous IDSR_BN_SETS were completed.
425 		 */
426 		if (curbusy) {
427 			busy++;
428 			continue;
429 		}
430 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
431 
432 #if (NCPU > IDSR_BN_SETS)
433 		if (shipped < ncpuids) {
434 			uint64_t cpus_left;
435 			uint16_t next = (uint16_t)index;
436 
437 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
438 			    busymask;
439 
440 			if (cpus_left) {
441 				do {
442 					/*
443 					 * Sequence through and ship to the
444 					 * remainder of the CPUs in the system
445 					 * (e.g. other than the first
446 					 * (IDSR_BN_SETS)) in reverse order.
447 					 */
448 					lo = lowbit(cpus_left) - 1;
449 					i = IDSR_BUSY_IDX(lo);
450 					shipit(next, i);
451 					shipped++;
452 					cpuids[i] = next;
453 
454 					/*
455 					 * If we've processed all the CPUs,
456 					 * exit the loop now and save
457 					 * instructions.
458 					 */
459 					if (shipped == ncpuids)
460 						break;
461 
462 					for ((index = ((int)next - 1));
463 					    index >= 0; index--)
464 						if (CPU_IN_SET(set, index)) {
465 							next = (uint16_t)index;
466 							break;
467 						}
468 
469 					cpus_left &= ~(1ull << lo);
470 				} while (cpus_left);
471 				continue;
472 			}
473 		}
474 #endif
475 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
476 		if (curbusy) {
477 			busy++;
478 			continue;
479 		}
480 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
481 #ifdef SEND_MONDO_STATS
482 		{
483 			int n = gettick() - starttick;
484 			if (n < 8192)
485 				x_nack_stimes[n >> 7]++;
486 		}
487 #endif
488 		while (gettick() < (tick + sys_clock_mhz))
489 			;
490 		do {
491 			lo = lowbit(curnack) - 1;
492 			i = IDSR_NACK_IDX(lo);
493 			shipit(cpuids[i], i);
494 			curnack &= ~(1ull << lo);
495 		} while (curnack);
496 		nack++;
497 		busy = 0;
498 	}
499 #ifdef SEND_MONDO_STATS
500 	{
501 		int n = gettick() - starttick;
502 		if (n < 8192)
503 			x_set_stimes[n >> 7]++;
504 		else
505 			x_set_ltimes[(n >> 13) & 0xf]++;
506 	}
507 	x_set_cpus[shipped]++;
508 #endif
509 }
510 
511 /*
512  * Cpu private initialization.
513  */
514 void
515 cpu_init_private(struct cpu *cp)
516 {
517 	if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) ||
518 	    (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) {
519 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is "
520 		    "supported", cp->cpu_id,
521 		    cpunodes[cp->cpu_id].implementation);
522 	}
523 
524 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
525 }
526 
527 void
528 cpu_setup(void)
529 {
530 	extern int at_flags;
531 	extern int cpc_has_overflow_intr;
532 	uint64_t cpu0_log;
533 	extern	 uint64_t opl_cpu0_err_log;
534 
535 	/*
536 	 * Initialize Error log Scratch register for error handling.
537 	 */
538 
539 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
540 	opl_error_setup(cpu0_log);
541 
542 	/*
543 	 * Enable MMU translating multiple page sizes for
544 	 * sITLB and sDTLB.
545 	 */
546 	opl_mpg_enable();
547 
548 	/*
549 	 * Setup chip-specific trap handlers.
550 	 */
551 	cpu_init_trap();
552 
553 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
554 
555 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
556 
557 	/*
558 	 * Due to the number of entries in the fully-associative tlb
559 	 * this may have to be tuned lower than in spitfire.
560 	 */
561 	pp_slots = MIN(8, MAXPP_SLOTS);
562 
563 	/*
564 	 * Block stores do not invalidate all pages of the d$, pagecopy
565 	 * et. al. need virtual translations with virtual coloring taken
566 	 * into consideration.  prefetch/ldd will pollute the d$ on the
567 	 * load side.
568 	 */
569 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
570 
571 	if (use_page_coloring) {
572 		do_pg_coloring = 1;
573 	}
574 
575 	isa_list =
576 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
577 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
578 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
579 
580 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 |
581 	    AV_SPARC_POPC | AV_SPARC_FMAF;
582 
583 	/*
584 	 * On SPARC64-VI, there's no hole in the virtual address space
585 	 */
586 	hole_start = hole_end = 0;
587 
588 	/*
589 	 * The kpm mapping window.
590 	 * kpm_size:
591 	 *	The size of a single kpm range.
592 	 *	The overall size will be: kpm_size * vac_colors.
593 	 * kpm_vbase:
594 	 *	The virtual start address of the kpm range within the kernel
595 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
596 	 */
597 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
598 	kpm_size_shift = 47;
599 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
600 	kpm_smallpages = 1;
601 
602 	/*
603 	 * The traptrace code uses either %tick or %stick for
604 	 * timestamping.  We have %stick so we can use it.
605 	 */
606 	traptrace_use_stick = 1;
607 
608 	/*
609 	 * SPARC64-VI has a performance counter overflow interrupt
610 	 */
611 	cpc_has_overflow_intr = 1;
612 
613 	/*
614 	 * Declare that this architecture/cpu combination does not support
615 	 * fpRAS.
616 	 */
617 	fpras_implemented = 0;
618 }
619 
620 /*
621  * Called by setcpudelay
622  */
623 void
624 cpu_init_tick_freq(void)
625 {
626 	/*
627 	 * For SPARC64-VI we want to use the system clock rate as
628 	 * the basis for low level timing, due to support of mixed
629 	 * speed CPUs and power managment.
630 	 */
631 	if (system_clock_freq == 0)
632 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
633 
634 	sys_tick_freq = system_clock_freq;
635 }
636 
637 #ifdef SEND_MONDO_STATS
638 uint32_t x_one_stimes[64];
639 uint32_t x_one_ltimes[16];
640 uint32_t x_set_stimes[64];
641 uint32_t x_set_ltimes[16];
642 uint32_t x_set_cpus[NCPU];
643 uint32_t x_nack_stimes[64];
644 #endif
645 
646 /*
647  * Note: A version of this function is used by the debugger via the KDI,
648  * and must be kept in sync with this version.  Any changes made to this
649  * function to support new chips or to accomodate errata must also be included
650  * in the KDI-specific version.  See us3_kdi.c.
651  */
652 void
653 send_one_mondo(int cpuid)
654 {
655 	int busy, nack;
656 	uint64_t idsr, starttick, endtick, tick, lasttick;
657 	uint64_t busymask;
658 
659 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
660 	starttick = lasttick = gettick();
661 	shipit(cpuid, 0);
662 	endtick = starttick + xc_tick_limit;
663 	busy = nack = 0;
664 	busymask = IDSR_BUSY;
665 	for (;;) {
666 		idsr = getidsr();
667 		if (idsr == 0)
668 			break;
669 
670 		tick = gettick();
671 		/*
672 		 * If there is a big jump between the current tick
673 		 * count and lasttick, we have probably hit a break
674 		 * point.  Adjust endtick accordingly to avoid panic.
675 		 */
676 		if (tick > (lasttick + xc_tick_jump_limit))
677 			endtick += (tick - lasttick);
678 		lasttick = tick;
679 		if (tick > endtick) {
680 			if (panic_quiesce)
681 				return;
682 			cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) "
683 			    "[%d NACK %d BUSY]", cpuid, nack, busy);
684 		}
685 
686 		if (idsr & busymask) {
687 			busy++;
688 			continue;
689 		}
690 		drv_usecwait(1);
691 		shipit(cpuid, 0);
692 		nack++;
693 		busy = 0;
694 	}
695 #ifdef SEND_MONDO_STATS
696 	{
697 		int n = gettick() - starttick;
698 		if (n < 8192)
699 			x_one_stimes[n >> 7]++;
700 		else
701 			x_one_ltimes[(n >> 13) & 0xf]++;
702 	}
703 #endif
704 }
705 
706 /*
707  * init_mmu_page_sizes is set to one after the bootup time initialization
708  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
709  * valid value.
710  *
711  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
712  * versions of disable_ism_large_pages and disable_large_pages, and feed back
713  * into those two hat variables at hat initialization time.
714  *
715  */
716 int init_mmu_page_sizes = 0;
717 
718 static uint_t mmu_disable_large_pages = 0;
719 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
720 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
721 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
722 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
723 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
724 	(1 << TTE512K));
725 
726 /*
727  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
728  * Called during very early bootup from check_cpus_set().
729  * Can be called to verify that mmu_page_sizes are set up correctly.
730  *
731  * Set Olympus defaults. We do not use the function parameter.
732  */
733 /*ARGSUSED*/
734 int
735 mmu_init_mmu_page_sizes(int32_t not_used)
736 {
737 	if (!init_mmu_page_sizes) {
738 		mmu_page_sizes = MMU_PAGE_SIZES;
739 		mmu_hashcnt = MAX_HASHCNT;
740 		mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
741 		mmu_exported_pagesize_mask = (1 << TTE8K) |
742 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
743 		    (1 << TTE32M) | (1 << TTE256M);
744 		init_mmu_page_sizes = 1;
745 		return (0);
746 	}
747 	return (1);
748 }
749 
750 /* SPARC64-VI worst case DTLB parameters */
751 #ifndef	LOCKED_DTLB_ENTRIES
752 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
753 #endif
754 #define	TOTAL_DTLB_ENTRIES	32
755 #define	AVAIL_32M_ENTRIES	0
756 #define	AVAIL_256M_ENTRIES	0
757 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
758 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
759 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
760 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
761 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
762 
763 /*
764  * The function returns the mmu-specific values for the
765  * hat's disable_large_pages, disable_ism_large_pages, and
766  * disable_auto_data_large_pages and
767  * disable_text_data_large_pages variables.
768  */
769 uint_t
770 mmu_large_pages_disabled(uint_t flag)
771 {
772 	uint_t pages_disable = 0;
773 	extern int use_text_pgsz64K;
774 	extern int use_text_pgsz512K;
775 
776 	if (flag == HAT_LOAD) {
777 		pages_disable =  mmu_disable_large_pages;
778 	} else if (flag == HAT_LOAD_SHARE) {
779 		pages_disable = mmu_disable_ism_large_pages;
780 	} else if (flag == HAT_AUTO_DATA) {
781 		pages_disable = mmu_disable_auto_data_large_pages;
782 	} else if (flag == HAT_AUTO_TEXT) {
783 		pages_disable = mmu_disable_auto_text_large_pages;
784 		if (use_text_pgsz512K) {
785 			pages_disable &= ~(1 << TTE512K);
786 		}
787 		if (use_text_pgsz64K) {
788 			pages_disable &= ~(1 << TTE64K);
789 		}
790 	}
791 	return (pages_disable);
792 }
793 
794 /*
795  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
796  * It may be called from set_platform_defaults, if some value other than 32M
797  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
798  * then only warn, since it would be bad form to panic due to a user typo.
799  *
800  * The function re-initializes the mmu_disable_ism_large_pages variable.
801  */
802 void
803 mmu_init_large_pages(size_t ism_pagesize)
804 {
805 	switch (ism_pagesize) {
806 	case MMU_PAGESIZE4M:
807 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
808 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
809 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
810 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
811 		break;
812 	case MMU_PAGESIZE32M:
813 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
814 		    (1 << TTE512K) | (1 << TTE256M));
815 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
816 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
817 		adjust_data_maxlpsize(ism_pagesize);
818 		break;
819 	case MMU_PAGESIZE256M:
820 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
821 		    (1 << TTE512K) | (1 << TTE32M));
822 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
823 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
824 		adjust_data_maxlpsize(ism_pagesize);
825 		break;
826 	default:
827 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
828 		    ism_pagesize);
829 		break;
830 	}
831 }
832 
833 /*
834  * Function to reprogram the TLBs when page sizes used
835  * by a process change significantly.
836  */
837 void
838 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
839 {
840 	uint8_t pgsz0, pgsz1;
841 
842 	/*
843 	 * Don't program 2nd dtlb for kernel and ism hat
844 	 */
845 	ASSERT(hat->sfmmu_ismhat == NULL);
846 	ASSERT(hat != ksfmmup);
847 
848 	/*
849 	 * hat->sfmmu_pgsz[] is an array whose elements
850 	 * contain a sorted order of page sizes.  Element
851 	 * 0 is the most commonly used page size, followed
852 	 * by element 1, and so on.
853 	 *
854 	 * ttecnt[] is an array of per-page-size page counts
855 	 * mapped into the process.
856 	 *
857 	 * If the HAT's choice for page sizes is unsuitable,
858 	 * we can override it here.  The new values written
859 	 * to the array will be handed back to us later to
860 	 * do the actual programming of the TLB hardware.
861 	 *
862 	 */
863 	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
864 	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
865 
866 	/*
867 	 * This implements PAGESIZE programming of the sTLB
868 	 * if large TTE counts don't exceed the thresholds.
869 	 */
870 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
871 		pgsz0 = page_szc(MMU_PAGESIZE);
872 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
873 		pgsz1 = page_szc(MMU_PAGESIZE);
874 	tmp_pgsz[0] = pgsz0;
875 	tmp_pgsz[1] = pgsz1;
876 	/* otherwise, accept what the HAT chose for us */
877 }
878 
879 /*
880  * The HAT calls this function when an MMU context is allocated so that we
881  * can reprogram the large TLBs appropriately for the new process using
882  * the context.
883  *
884  * The caller must hold the HAT lock.
885  */
886 void
887 mmu_set_ctx_page_sizes(struct hat *hat)
888 {
889 	uint8_t pgsz0, pgsz1;
890 	uint8_t new_cext;
891 
892 	ASSERT(sfmmu_hat_lock_held(hat));
893 	/*
894 	 * Don't program 2nd dtlb for kernel and ism hat
895 	 */
896 	if (hat->sfmmu_ismhat || hat == ksfmmup)
897 		return;
898 
899 	/*
900 	 * If supported, reprogram the TLBs to a larger pagesize.
901 	 */
902 	pgsz0 = hat->sfmmu_pgsz[0];
903 	pgsz1 = hat->sfmmu_pgsz[1];
904 	ASSERT(pgsz0 < mmu_page_sizes);
905 	ASSERT(pgsz1 < mmu_page_sizes);
906 	new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
907 	if (hat->sfmmu_cext != new_cext) {
908 #ifdef DEBUG
909 		int i;
910 		/*
911 		 * assert cnum should be invalid, this is because pagesize
912 		 * can only be changed after a proc's ctxs are invalidated.
913 		 */
914 		for (i = 0; i < max_mmu_ctxdoms; i++) {
915 			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
916 		}
917 #endif /* DEBUG */
918 		hat->sfmmu_cext = new_cext;
919 	}
920 	/*
921 	 * sfmmu_setctx_sec() will take care of the
922 	 * rest of the dirty work for us.
923 	 */
924 }
925 
926 /*
927  * This function assumes that there are either four or six supported page
928  * sizes and at most two programmable TLBs, so we need to decide which
929  * page sizes are most important and then adjust the TLB page sizes
930  * accordingly (if supported).
931  *
932  * If these assumptions change, this function will need to be
933  * updated to support whatever the new limits are.
934  */
935 void
936 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
937 {
938 	uint64_t sortcnt[MMU_PAGE_SIZES];
939 	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
940 	uint8_t i, j, max;
941 	uint16_t oldval, newval;
942 
943 	/*
944 	 * We only consider reprogramming the TLBs if one or more of
945 	 * the two most used page sizes changes and we're using
946 	 * large pages in this process.
947 	 */
948 	if (SFMMU_LGPGS_INUSE(sfmmup)) {
949 		/* Sort page sizes. */
950 		for (i = 0; i < mmu_page_sizes; i++) {
951 			sortcnt[i] = ttecnt[i];
952 		}
953 		for (j = 0; j < mmu_page_sizes; j++) {
954 			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
955 				if (sortcnt[i] > sortcnt[max])
956 					max = i;
957 			}
958 			tmp_pgsz[j] = max;
959 			sortcnt[max] = 0;
960 		}
961 
962 		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
963 
964 		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
965 
966 		/* Check 2 largest values after the sort. */
967 		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
968 		if (newval != oldval) {
969 			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
970 		}
971 	}
972 }
973 
974 /*
975  * Return processor specific async error structure
976  * size used.
977  */
978 int
979 cpu_aflt_size(void)
980 {
981 	return (sizeof (opl_async_flt_t));
982 }
983 
984 /*
985  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
986  * post-process CPU events that are dequeued.  As such, it can be invoked
987  * from softint context, from AST processing in the trap() flow, or from the
988  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
989  * Historically this entry point was used to log the actual cmn_err(9F) text;
990  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
991  * With FMA this function now also returns a flag which indicates to the
992  * caller whether the ereport should be posted (1) or suppressed (0).
993  */
994 /*ARGSUSED*/
995 static int
996 cpu_sync_log_err(void *flt)
997 {
998 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
999 	struct async_flt *aflt = (struct async_flt *)flt;
1000 
1001 	/*
1002 	 * No extra processing of urgent error events.
1003 	 * Always generate ereports for these events.
1004 	 */
1005 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
1006 		return (1);
1007 
1008 	/*
1009 	 * Additional processing for synchronous errors.
1010 	 */
1011 	switch (opl_flt->flt_type) {
1012 	case OPL_CPU_INV_SFSR:
1013 		return (1);
1014 
1015 	case OPL_CPU_SYNC_UE:
1016 		/*
1017 		 * The validity: SFSR_MK_UE bit has been checked
1018 		 * in opl_cpu_sync_error()
1019 		 * No more check is required.
1020 		 *
1021 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
1022 		 * and they have been retrieved in cpu_queue_events()
1023 		 */
1024 
1025 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
1026 			ASSERT(aflt->flt_in_memory);
1027 			/*
1028 			 * We want to skip logging only if ALL the following
1029 			 * conditions are true:
1030 			 *
1031 			 *	1. We are not panicing already.
1032 			 *	2. The error is a memory error.
1033 			 *	3. There is only one error.
1034 			 *	4. The error is on a retired page.
1035 			 *	5. The error occurred under on_trap
1036 			 *	protection AFLT_PROT_EC
1037 			 */
1038 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1039 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1040 				/*
1041 				 * Do not log an error from
1042 				 * the retired page
1043 				 */
1044 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1045 				return (0);
1046 			}
1047 			if (!panicstr)
1048 				cpu_page_retire(opl_flt);
1049 		}
1050 		return (1);
1051 
1052 	case OPL_CPU_SYNC_OTHERS:
1053 		/*
1054 		 * For the following error cases, the processor HW does
1055 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1056 		 * to assign appropriate values here to reflect what we
1057 		 * think is the most likely cause of the problem w.r.t to
1058 		 * the particular error event.  For Buserr and timeout
1059 		 * error event, we will assign OPL_ERRID_CHANNEL as the
1060 		 * most likely reason.  For TLB parity or multiple hit
1061 		 * error events, we will assign the reason as
1062 		 * OPL_ERRID_CPU (cpu related problem) and set the
1063 		 * flt_eid_sid to point to the cpuid.
1064 		 */
1065 
1066 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1067 			/*
1068 			 * flt_eid_sid will not be used for this case.
1069 			 */
1070 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1071 		}
1072 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1073 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1074 			opl_flt->flt_eid_sid = aflt->flt_inst;
1075 		}
1076 
1077 		/*
1078 		 * In case of no effective error bit
1079 		 */
1080 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1081 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1082 			opl_flt->flt_eid_sid = aflt->flt_inst;
1083 		}
1084 		break;
1085 
1086 		default:
1087 			return (1);
1088 	}
1089 	return (1);
1090 }
1091 
1092 /*
1093  * Retire the bad page that may contain the flushed error.
1094  */
1095 void
1096 cpu_page_retire(opl_async_flt_t *opl_flt)
1097 {
1098 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1099 	(void) page_retire(aflt->flt_addr, PR_UE);
1100 }
1101 
1102 /*
1103  * Invoked by error_init() early in startup and therefore before
1104  * startup_errorq() is called to drain any error Q -
1105  *
1106  * startup()
1107  *   startup_end()
1108  *     error_init()
1109  *       cpu_error_init()
1110  * errorq_init()
1111  *   errorq_drain()
1112  * start_other_cpus()
1113  *
1114  * The purpose of this routine is to create error-related taskqs.  Taskqs
1115  * are used for this purpose because cpu_lock can't be grabbed from interrupt
1116  * context.
1117  *
1118  */
1119 /*ARGSUSED*/
1120 void
1121 cpu_error_init(int items)
1122 {
1123 	opl_err_log = (opl_errlog_t *)
1124 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1125 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1126 		cmn_err(CE_PANIC, "The base address of the error log "
1127 		    "is not page aligned");
1128 }
1129 
1130 /*
1131  * We route all errors through a single switch statement.
1132  */
1133 void
1134 cpu_ue_log_err(struct async_flt *aflt)
1135 {
1136 	switch (aflt->flt_class) {
1137 	case CPU_FAULT:
1138 		if (cpu_sync_log_err(aflt))
1139 			cpu_ereport_post(aflt);
1140 		break;
1141 
1142 	case BUS_FAULT:
1143 		bus_async_log_err(aflt);
1144 		break;
1145 
1146 	default:
1147 		cmn_err(CE_WARN, "discarding async error %p with invalid "
1148 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1149 		return;
1150 	}
1151 }
1152 
1153 /*
1154  * Routine for panic hook callback from panic_idle().
1155  *
1156  * Nothing to do here.
1157  */
1158 void
1159 cpu_async_panic_callb(void)
1160 {
1161 }
1162 
1163 /*
1164  * Routine to return a string identifying the physical name
1165  * associated with a memory/cache error.
1166  */
1167 /*ARGSUSED*/
1168 int
1169 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1170     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1171     ushort_t flt_status, char *buf, int buflen, int *lenp)
1172 {
1173 	int synd_code;
1174 	int ret;
1175 
1176 	/*
1177 	 * An AFSR of -1 defaults to a memory syndrome.
1178 	 */
1179 	synd_code = (int)flt_synd;
1180 
1181 	if (&plat_get_mem_unum) {
1182 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1183 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1184 			buf[0] = '\0';
1185 			*lenp = 0;
1186 		}
1187 		return (ret);
1188 	}
1189 	buf[0] = '\0';
1190 	*lenp = 0;
1191 	return (ENOTSUP);
1192 }
1193 
1194 /*
1195  * Wrapper for cpu_get_mem_unum() routine that takes an
1196  * async_flt struct rather than explicit arguments.
1197  */
1198 int
1199 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1200     char *buf, int buflen, int *lenp)
1201 {
1202 	/*
1203 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1204 	 * memory error.
1205 	 */
1206 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1207 	    (uint64_t)-1,
1208 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1209 	    aflt->flt_status, buf, buflen, lenp));
1210 }
1211 
1212 /*
1213  * This routine is a more generic interface to cpu_get_mem_unum()
1214  * that may be used by other modules (e.g. mm).
1215  */
1216 /*ARGSUSED*/
1217 int
1218 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1219     char *buf, int buflen, int *lenp)
1220 {
1221 	int synd_status, flt_in_memory, ret;
1222 	ushort_t flt_status = 0;
1223 	char unum[UNUM_NAMLEN];
1224 
1225 	/*
1226 	 * Check for an invalid address.
1227 	 */
1228 	if (afar == (uint64_t)-1)
1229 		return (ENXIO);
1230 
1231 	if (synd == (uint64_t)-1)
1232 		synd_status = AFLT_STAT_INVALID;
1233 	else
1234 		synd_status = AFLT_STAT_VALID;
1235 
1236 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1237 	    pf_is_memory(afar >> MMU_PAGESHIFT);
1238 
1239 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1240 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
1241 	if (ret != 0)
1242 		return (ret);
1243 
1244 	if (*lenp >= buflen)
1245 		return (ENAMETOOLONG);
1246 
1247 	(void) strncpy(buf, unum, buflen);
1248 
1249 	return (0);
1250 }
1251 
1252 /*
1253  * Routine to return memory information associated
1254  * with a physical address and syndrome.
1255  */
1256 /*ARGSUSED*/
1257 int
1258 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1259     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1260     int *segsp, int *banksp, int *mcidp)
1261 {
1262 	int synd_code = (int)synd;
1263 
1264 	if (afar == (uint64_t)-1)
1265 		return (ENXIO);
1266 
1267 	if (p2get_mem_info != NULL)
1268 		return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep,
1269 		    bank_sizep, segsp, banksp, mcidp));
1270 	else
1271 		return (ENOTSUP);
1272 }
1273 
1274 /*
1275  * Routine to return a string identifying the physical
1276  * name associated with a cpuid.
1277  */
1278 int
1279 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1280 {
1281 	int ret;
1282 	char unum[UNUM_NAMLEN];
1283 
1284 	if (&plat_get_cpu_unum) {
1285 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN,
1286 		    lenp)) != 0)
1287 			return (ret);
1288 	} else {
1289 		return (ENOTSUP);
1290 	}
1291 
1292 	if (*lenp >= buflen)
1293 		return (ENAMETOOLONG);
1294 
1295 	(void) strncpy(buf, unum, *lenp);
1296 
1297 	return (0);
1298 }
1299 
1300 /*
1301  * This routine exports the name buffer size.
1302  */
1303 size_t
1304 cpu_get_name_bufsize()
1305 {
1306 	return (UNUM_NAMLEN);
1307 }
1308 
1309 /*
1310  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1311  */
1312 void
1313 cpu_flush_ecache(void)
1314 {
1315 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1316 	    cpunodes[CPU->cpu_id].ecache_linesize);
1317 }
1318 
1319 static uint8_t
1320 flt_to_trap_type(struct async_flt *aflt)
1321 {
1322 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1323 		return (TRAP_TYPE_ECC_I);
1324 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1325 		return (TRAP_TYPE_ECC_D);
1326 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1327 		return (TRAP_TYPE_URGENT);
1328 	return (TRAP_TYPE_UNKNOWN);
1329 }
1330 
1331 /*
1332  * Encode the data saved in the opl_async_flt_t struct into
1333  * the FM ereport payload.
1334  */
1335 /* ARGSUSED */
1336 static void
1337 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1338 		nvlist_t *resource)
1339 {
1340 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1341 	char unum[UNUM_NAMLEN];
1342 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1343 	int len;
1344 
1345 
1346 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1347 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1348 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1349 	}
1350 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1351 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1352 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1353 	}
1354 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1355 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1356 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1357 	}
1358 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1359 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1360 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1361 	}
1362 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1363 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1364 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1365 	}
1366 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1367 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1368 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1369 	}
1370 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1371 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1372 		    DATA_TYPE_BOOLEAN_VALUE,
1373 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1374 	}
1375 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1376 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1377 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1378 	}
1379 
1380 	switch (opl_flt->flt_eid_mod) {
1381 	case OPL_ERRID_CPU:
1382 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1383 		    (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1384 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1385 		    NULL, opl_flt->flt_eid_sid,
1386 		    (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf);
1387 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1388 		    DATA_TYPE_NVLIST, resource, NULL);
1389 		break;
1390 
1391 	case OPL_ERRID_CHANNEL:
1392 		/*
1393 		 * No resource is created but the cpumem DE will find
1394 		 * the defective path by retreiving EID from SFSR which is
1395 		 * included in the payload.
1396 		 */
1397 		break;
1398 
1399 	case OPL_ERRID_MEM:
1400 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1401 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
1402 		    unum, NULL, (uint64_t)-1);
1403 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1404 		    DATA_TYPE_NVLIST, resource, NULL);
1405 		break;
1406 
1407 	case OPL_ERRID_PATH:
1408 		/*
1409 		 * No resource is created but the cpumem DE will find
1410 		 * the defective path by retreiving EID from SFSR which is
1411 		 * included in the payload.
1412 		 */
1413 		break;
1414 	}
1415 }
1416 
1417 /*
1418  * Returns whether fault address is valid for this error bit and
1419  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1420  */
1421 /*ARGSUSED*/
1422 static int
1423 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1424 {
1425 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1426 
1427 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1428 		return ((t_afsr_bit & SFSR_MEMORY) &&
1429 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1430 	}
1431 	return (0);
1432 }
1433 
1434 /*
1435  * In OPL SCF does the stick synchronization.
1436  */
1437 void
1438 sticksync_slave(void)
1439 {
1440 }
1441 
1442 /*
1443  * In OPL SCF does the stick synchronization.
1444  */
1445 void
1446 sticksync_master(void)
1447 {
1448 }
1449 
1450 /*
1451  * Cpu private unitialization.  OPL cpus do not use the private area.
1452  */
1453 void
1454 cpu_uninit_private(struct cpu *cp)
1455 {
1456 	cmp_delete_cpu(cp->cpu_id);
1457 }
1458 
1459 /*
1460  * Always flush an entire cache.
1461  */
1462 void
1463 cpu_error_ecache_flush(void)
1464 {
1465 	cpu_flush_ecache();
1466 }
1467 
1468 void
1469 cpu_ereport_post(struct async_flt *aflt)
1470 {
1471 	char *cpu_type, buf[FM_MAX_CLASS];
1472 	nv_alloc_t *nva = NULL;
1473 	nvlist_t *ereport, *detector, *resource;
1474 	errorq_elem_t *eqep;
1475 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1476 
1477 	if (aflt->flt_panic || panicstr) {
1478 		eqep = errorq_reserve(ereport_errorq);
1479 		if (eqep == NULL)
1480 			return;
1481 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1482 		nva = errorq_elem_nva(ereport_errorq, eqep);
1483 	} else {
1484 		ereport = fm_nvlist_create(nva);
1485 	}
1486 
1487 	/*
1488 	 * Create the scheme "cpu" FMRI.
1489 	 */
1490 	detector = fm_nvlist_create(nva);
1491 	resource = fm_nvlist_create(nva);
1492 	switch (cpunodes[aflt->flt_inst].implementation) {
1493 	case OLYMPUS_C_IMPL:
1494 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1495 		break;
1496 	case JUPITER_IMPL:
1497 		cpu_type = FM_EREPORT_CPU_SPARC64_VII;
1498 		break;
1499 	default:
1500 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1501 		break;
1502 	}
1503 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1504 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1505 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1506 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1507 	    sbuf);
1508 
1509 	/*
1510 	 * Encode all the common data into the ereport.
1511 	 */
1512 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1513 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1514 
1515 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1516 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1517 
1518 	/*
1519 	 * Encode the error specific data that was saved in
1520 	 * the async_flt structure into the ereport.
1521 	 */
1522 	cpu_payload_add_aflt(aflt, ereport, resource);
1523 
1524 	if (aflt->flt_panic || panicstr) {
1525 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1526 	} else {
1527 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1528 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1529 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1530 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1531 	}
1532 }
1533 
1534 void
1535 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1536 {
1537 	int status;
1538 	ddi_fm_error_t de;
1539 
1540 	bzero(&de, sizeof (ddi_fm_error_t));
1541 
1542 	de.fme_version = DDI_FME_VERSION;
1543 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1544 	de.fme_flag = expected;
1545 	de.fme_bus_specific = (void *)aflt->flt_addr;
1546 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1547 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1548 		aflt->flt_panic = 1;
1549 }
1550 
1551 void
1552 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1553     errorq_t *eqp, uint_t flag)
1554 {
1555 	struct async_flt *aflt = (struct async_flt *)payload;
1556 
1557 	aflt->flt_erpt_class = error_class;
1558 	errorq_dispatch(eqp, payload, payload_sz, flag);
1559 }
1560 
1561 void
1562 adjust_hw_copy_limits(int ecache_size)
1563 {
1564 	/*
1565 	 * Set hw copy limits.
1566 	 *
1567 	 * /etc/system will be parsed later and can override one or more
1568 	 * of these settings.
1569 	 *
1570 	 * At this time, ecache size seems only mildly relevant.
1571 	 * We seem to run into issues with the d-cache and stalls
1572 	 * we see on misses.
1573 	 *
1574 	 * Cycle measurement indicates that 2 byte aligned copies fare
1575 	 * little better than doing things with VIS at around 512 bytes.
1576 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1577 	 * aligned is faster whenever the source and destination data
1578 	 * in cache and the total size is less than 2 Kbytes.  The 2K
1579 	 * limit seems to be driven by the 2K write cache.
1580 	 * When more than 2K of copies are done in non-VIS mode, stores
1581 	 * backup in the write cache.  In VIS mode, the write cache is
1582 	 * bypassed, allowing faster cache-line writes aligned on cache
1583 	 * boundaries.
1584 	 *
1585 	 * In addition, in non-VIS mode, there is no prefetching, so
1586 	 * for larger copies, the advantage of prefetching to avoid even
1587 	 * occasional cache misses is enough to justify using the VIS code.
1588 	 *
1589 	 * During testing, it was discovered that netbench ran 3% slower
1590 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1591 	 * applications, data is only used once (copied to the output
1592 	 * buffer, then copied by the network device off the system).  Using
1593 	 * the VIS copy saves more L2 cache state.  Network copies are
1594 	 * around 1.3K to 1.5K in size for historical reasons.
1595 	 *
1596 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1597 	 * aligned copy even for large caches and 8 MB ecache.  The
1598 	 * infrastructure to allow different limits for different sized
1599 	 * caches is kept to allow further tuning in later releases.
1600 	 */
1601 
1602 	if (min_ecache_size == 0 && use_hw_bcopy) {
1603 		/*
1604 		 * First time through - should be before /etc/system
1605 		 * is read.
1606 		 * Could skip the checks for zero but this lets us
1607 		 * preserve any debugger rewrites.
1608 		 */
1609 		if (hw_copy_limit_1 == 0) {
1610 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1611 			priv_hcl_1 = hw_copy_limit_1;
1612 		}
1613 		if (hw_copy_limit_2 == 0) {
1614 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1615 			priv_hcl_2 = hw_copy_limit_2;
1616 		}
1617 		if (hw_copy_limit_4 == 0) {
1618 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1619 			priv_hcl_4 = hw_copy_limit_4;
1620 		}
1621 		if (hw_copy_limit_8 == 0) {
1622 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1623 			priv_hcl_8 = hw_copy_limit_8;
1624 		}
1625 		min_ecache_size = ecache_size;
1626 	} else {
1627 		/*
1628 		 * MP initialization. Called *after* /etc/system has
1629 		 * been parsed. One CPU has already been initialized.
1630 		 * Need to cater for /etc/system having scragged one
1631 		 * of our values.
1632 		 */
1633 		if (ecache_size == min_ecache_size) {
1634 			/*
1635 			 * Same size ecache. We do nothing unless we
1636 			 * have a pessimistic ecache setting. In that
1637 			 * case we become more optimistic (if the cache is
1638 			 * large enough).
1639 			 */
1640 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1641 				/*
1642 				 * Need to adjust hw_copy_limit* from our
1643 				 * pessimistic uniprocessor value to a more
1644 				 * optimistic UP value *iff* it hasn't been
1645 				 * reset.
1646 				 */
1647 				if ((ecache_size > 1048576) &&
1648 				    (priv_hcl_8 == hw_copy_limit_8)) {
1649 					if (ecache_size <= 2097152)
1650 						hw_copy_limit_8 = 4 *
1651 						    VIS_COPY_THRESHOLD;
1652 					else if (ecache_size <= 4194304)
1653 						hw_copy_limit_8 = 4 *
1654 						    VIS_COPY_THRESHOLD;
1655 					else
1656 						hw_copy_limit_8 = 4 *
1657 						    VIS_COPY_THRESHOLD;
1658 					priv_hcl_8 = hw_copy_limit_8;
1659 				}
1660 			}
1661 		} else if (ecache_size < min_ecache_size) {
1662 			/*
1663 			 * A different ecache size. Can this even happen?
1664 			 */
1665 			if (priv_hcl_8 == hw_copy_limit_8) {
1666 				/*
1667 				 * The previous value that we set
1668 				 * is unchanged (i.e., it hasn't been
1669 				 * scragged by /etc/system). Rewrite it.
1670 				 */
1671 				if (ecache_size <= 1048576)
1672 					hw_copy_limit_8 = 8 *
1673 					    VIS_COPY_THRESHOLD;
1674 				else if (ecache_size <= 2097152)
1675 					hw_copy_limit_8 = 8 *
1676 					    VIS_COPY_THRESHOLD;
1677 				else if (ecache_size <= 4194304)
1678 					hw_copy_limit_8 = 8 *
1679 					    VIS_COPY_THRESHOLD;
1680 				else
1681 					hw_copy_limit_8 = 10 *
1682 					    VIS_COPY_THRESHOLD;
1683 				priv_hcl_8 = hw_copy_limit_8;
1684 				min_ecache_size = ecache_size;
1685 			}
1686 		}
1687 	}
1688 }
1689 
1690 #define	VIS_BLOCKSIZE		64
1691 
1692 int
1693 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1694 {
1695 	int ret, watched;
1696 
1697 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1698 	ret = dtrace_blksuword32(addr, data, 0);
1699 	if (watched)
1700 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1701 
1702 	return (ret);
1703 }
1704 
1705 void
1706 opl_cpu_reg_init()
1707 {
1708 	uint64_t	this_cpu_log;
1709 
1710 	/*
1711 	 * We do not need to re-initialize cpu0 registers.
1712 	 */
1713 	if (cpu[getprocessorid()] == &cpu0)
1714 		return;
1715 
1716 	/*
1717 	 * Initialize Error log Scratch register for error handling.
1718 	 */
1719 
1720 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1721 	    ERRLOG_BUFSZ * (getprocessorid())));
1722 	opl_error_setup(this_cpu_log);
1723 
1724 	/*
1725 	 * Enable MMU translating multiple page sizes for
1726 	 * sITLB and sDTLB.
1727 	 */
1728 	opl_mpg_enable();
1729 }
1730 
1731 /*
1732  * Queue one event in ue_queue based on ecc_type_to_info entry.
1733  */
1734 static void
1735 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1736     ecc_type_to_info_t *eccp)
1737 {
1738 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1739 
1740 	if (reason &&
1741 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1742 		(void) strcat(reason, eccp->ec_reason);
1743 	}
1744 
1745 	opl_flt->flt_bit = eccp->ec_afsr_bit;
1746 	opl_flt->flt_type = eccp->ec_flt_type;
1747 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1748 	aflt->flt_payload = eccp->ec_err_payload;
1749 
1750 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1751 	cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt,
1752 	    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1753 }
1754 
1755 /*
1756  * Queue events on async event queue one event per error bit.
1757  * Return number of events queued.
1758  */
1759 int
1760 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1761 {
1762 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1763 	ecc_type_to_info_t *eccp;
1764 	int nevents = 0;
1765 
1766 	/*
1767 	 * Queue expected errors, error bit and fault type must must match
1768 	 * in the ecc_type_to_info table.
1769 	 */
1770 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1771 	    eccp++) {
1772 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1773 		    (eccp->ec_flags & aflt->flt_status) != 0) {
1774 			/*
1775 			 * UE error event can be further
1776 			 * classified/breakdown into finer granularity
1777 			 * based on the flt_eid_mod value set by HW.  We do
1778 			 * special handling here so that we can report UE
1779 			 * error in finer granularity as ue_mem,
1780 			 * ue_channel, ue_cpu or ue_path.
1781 			 */
1782 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1783 				opl_flt->flt_eid_mod = (aflt->flt_stat &
1784 				    SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT;
1785 				opl_flt->flt_eid_sid = (aflt->flt_stat &
1786 				    SFSR_EID_SID) >> SFSR_EID_SID_SHIFT;
1787 				/*
1788 				 * Need to advance eccp pointer by flt_eid_mod
1789 				 * so that we get an appropriate ecc pointer
1790 				 *
1791 				 * EID			# of advances
1792 				 * ----------------------------------
1793 				 * OPL_ERRID_MEM	0
1794 				 * OPL_ERRID_CHANNEL	1
1795 				 * OPL_ERRID_CPU	2
1796 				 * OPL_ERRID_PATH	3
1797 				 */
1798 				eccp += opl_flt->flt_eid_mod;
1799 			}
1800 			cpu_queue_one_event(opl_flt, reason, eccp);
1801 			t_afsr_errs &= ~eccp->ec_afsr_bit;
1802 			nevents++;
1803 		}
1804 	}
1805 
1806 	return (nevents);
1807 }
1808 
1809 /*
1810  * Sync. error wrapper functions.
1811  * We use these functions in order to transfer here from the
1812  * nucleus trap handler information about trap type (data or
1813  * instruction) and trap level (0 or above 0). This way we
1814  * get rid of using SFSR's reserved bits.
1815  */
1816 
1817 #define	OPL_SYNC_TL0	0
1818 #define	OPL_SYNC_TL1	1
1819 #define	OPL_ISYNC_ERR	0
1820 #define	OPL_DSYNC_ERR	1
1821 
1822 void
1823 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1824 {
1825 	uint64_t t_sfar = p_sfar;
1826 	uint64_t t_sfsr = p_sfsr;
1827 
1828 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1829 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1830 }
1831 
1832 void
1833 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1834 {
1835 	uint64_t t_sfar = p_sfar;
1836 	uint64_t t_sfsr = p_sfsr;
1837 
1838 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1839 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1840 }
1841 
1842 void
1843 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1844 {
1845 	uint64_t t_sfar = p_sfar;
1846 	uint64_t t_sfsr = p_sfsr;
1847 
1848 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1849 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1850 }
1851 
1852 void
1853 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1854 {
1855 	uint64_t t_sfar = p_sfar;
1856 	uint64_t t_sfsr = p_sfsr;
1857 
1858 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1859 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1860 }
1861 
1862 /*
1863  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1864  * and TLB_PRT.
1865  * This function is designed based on cpu_deferred_error().
1866  */
1867 
1868 static void
1869 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1870     uint_t tl, uint_t derr)
1871 {
1872 	opl_async_flt_t opl_flt;
1873 	struct async_flt *aflt;
1874 	int trampolined = 0;
1875 	char pr_reason[MAX_REASON_STRING];
1876 	uint64_t log_sfsr;
1877 	int expected = DDI_FM_ERR_UNEXPECTED;
1878 	ddi_acc_hdl_t *hp;
1879 
1880 	/*
1881 	 * We need to look at p_flag to determine if the thread detected an
1882 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1883 	 * because we just need a consistent snapshot and we know that everyone
1884 	 * else will store a consistent set of bits while holding p_lock.  We
1885 	 * don't have to worry about a race because SDOCORE is set once prior
1886 	 * to doing i/o from the process's address space and is never cleared.
1887 	 */
1888 	uint_t pflag = ttoproc(curthread)->p_flag;
1889 
1890 	pr_reason[0] = '\0';
1891 
1892 	/*
1893 	 * handle the specific error
1894 	 */
1895 	bzero(&opl_flt, sizeof (opl_async_flt_t));
1896 	aflt = (struct async_flt *)&opl_flt;
1897 	aflt->flt_id = gethrtime_waitfree();
1898 	aflt->flt_bus_id = getprocessorid();
1899 	aflt->flt_inst = CPU->cpu_id;
1900 	aflt->flt_stat = t_sfsr;
1901 	aflt->flt_addr = t_sfar;
1902 	aflt->flt_pc = (caddr_t)rp->r_pc;
1903 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1904 	aflt->flt_class = (uchar_t)CPU_FAULT;
1905 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate &
1906 	    TSTATE_PRIV) ? 1 : 0));
1907 	aflt->flt_tl = (uchar_t)tl;
1908 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1909 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1910 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1911 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1912 
1913 	/*
1914 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1915 	 * So, clear all error bits to avoid mis-handling and force the system
1916 	 * panicked.
1917 	 * We skip all the procedures below down to the panic message call.
1918 	 */
1919 	if (!(t_sfsr & SFSR_FV)) {
1920 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1921 		aflt->flt_panic = 1;
1922 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1923 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
1924 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
1925 		fm_panic("%sErrors(s)", "invalid SFSR");
1926 	}
1927 
1928 	/*
1929 	 * If either UE and MK bit is off, this is not valid UE error.
1930 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1931 	 * mis-handling below.
1932 	 * aflt->flt_stat keeps the original bits as a reference.
1933 	 */
1934 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1935 	    (SFSR_MK_UE|SFSR_UE)) {
1936 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1937 	}
1938 
1939 	/*
1940 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1941 	 * see if we were executing in the kernel under on_trap() or t_lofault
1942 	 * protection.  If so, modify the saved registers so that we return
1943 	 * from the trap to the appropriate trampoline routine.
1944 	 */
1945 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1946 		if (curthread->t_ontrap != NULL) {
1947 			on_trap_data_t *otp = curthread->t_ontrap;
1948 
1949 			if (otp->ot_prot & OT_DATA_EC) {
1950 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1951 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1952 				rp->r_pc = otp->ot_trampoline;
1953 				rp->r_npc = rp->r_pc + 4;
1954 				trampolined = 1;
1955 			}
1956 
1957 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1958 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1959 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
1960 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
1961 				rp->r_pc = otp->ot_trampoline;
1962 				rp->r_npc = rp->r_pc + 4;
1963 				trampolined = 1;
1964 				/*
1965 				 * for peeks and caut_gets errors are expected
1966 				 */
1967 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1968 				if (!hp)
1969 					expected = DDI_FM_ERR_PEEK;
1970 				else if (hp->ah_acc.devacc_attr_access ==
1971 				    DDI_CAUTIOUS_ACC)
1972 					expected = DDI_FM_ERR_EXPECTED;
1973 			}
1974 
1975 		} else if (curthread->t_lofault) {
1976 			aflt->flt_prot = AFLT_PROT_COPY;
1977 			rp->r_g1 = EFAULT;
1978 			rp->r_pc = curthread->t_lofault;
1979 			rp->r_npc = rp->r_pc + 4;
1980 			trampolined = 1;
1981 		}
1982 	}
1983 
1984 	/*
1985 	 * If we're in user mode or we're doing a protected copy, we either
1986 	 * want the ASTON code below to send a signal to the user process
1987 	 * or we want to panic if aft_panic is set.
1988 	 *
1989 	 * If we're in privileged mode and we're not doing a copy, then we
1990 	 * need to check if we've trampolined.  If we haven't trampolined,
1991 	 * we should panic.
1992 	 */
1993 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1994 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
1995 			aflt->flt_panic |= aft_panic;
1996 	} else if (!trampolined) {
1997 		aflt->flt_panic = 1;
1998 	}
1999 
2000 	/*
2001 	 * If we've trampolined due to a privileged TO or BERR, or if an
2002 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
2003 	 * event for that TO or BERR.  Queue all other events (if any) besides
2004 	 * the TO/BERR.
2005 	 */
2006 	log_sfsr = t_sfsr;
2007 	if (trampolined) {
2008 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2009 	} else if (!aflt->flt_priv) {
2010 		/*
2011 		 * User mode, suppress messages if
2012 		 * cpu_berr_to_verbose is not set.
2013 		 */
2014 		if (!cpu_berr_to_verbose)
2015 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
2016 	}
2017 
2018 	if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason,
2019 	    t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) {
2020 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
2021 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
2022 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
2023 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2024 	}
2025 
2026 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2027 		cpu_run_bus_error_handlers(aflt, expected);
2028 	}
2029 
2030 	/*
2031 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2032 	 * be logged as part of the panic flow.
2033 	 */
2034 	if (aflt->flt_panic) {
2035 		if (pr_reason[0] == 0)
2036 			strcpy(pr_reason, "invalid SFSR ");
2037 
2038 		fm_panic("%sErrors(s)", pr_reason);
2039 	}
2040 
2041 	/*
2042 	 * If we queued an error and we are going to return from the trap and
2043 	 * the error was in user mode or inside of a copy routine, set AST flag
2044 	 * so the queue will be drained before returning to user mode.  The
2045 	 * AST processing will also act on our failure policy.
2046 	 */
2047 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2048 		int pcb_flag = 0;
2049 
2050 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
2051 			pcb_flag |= ASYNC_HWERR;
2052 
2053 		if (t_sfsr & SFSR_BERR)
2054 			pcb_flag |= ASYNC_BERR;
2055 
2056 		if (t_sfsr & SFSR_TO)
2057 			pcb_flag |= ASYNC_BTO;
2058 
2059 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2060 		aston(curthread);
2061 	}
2062 }
2063 
2064 /*ARGSUSED*/
2065 void
2066 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2067 {
2068 	opl_async_flt_t opl_flt;
2069 	struct async_flt *aflt;
2070 	char pr_reason[MAX_REASON_STRING];
2071 
2072 	/* normalize tl */
2073 	tl = (tl >= 2 ? 1 : 0);
2074 	pr_reason[0] = '\0';
2075 
2076 	bzero(&opl_flt, sizeof (opl_async_flt_t));
2077 	aflt = (struct async_flt *)&opl_flt;
2078 	aflt->flt_id = gethrtime_waitfree();
2079 	aflt->flt_bus_id = getprocessorid();
2080 	aflt->flt_inst = CPU->cpu_id;
2081 	aflt->flt_stat = p_ugesr;
2082 	aflt->flt_pc = (caddr_t)rp->r_pc;
2083 	aflt->flt_class = (uchar_t)CPU_FAULT;
2084 	aflt->flt_tl = tl;
2085 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?
2086 	    1 : 0));
2087 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2088 	aflt->flt_panic = 1;
2089 	/*
2090 	 * HW does not set mod/sid in case of urgent error.
2091 	 * So we have to set it here.
2092 	 */
2093 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2094 	opl_flt.flt_eid_sid = aflt->flt_inst;
2095 
2096 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2097 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2098 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2099 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt,
2100 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
2101 	}
2102 
2103 	fm_panic("Urgent Error");
2104 }
2105 
2106 /*
2107  * Initialization error counters resetting.
2108  */
2109 /* ARGSUSED */
2110 static void
2111 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2112 {
2113 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2114 	hdlr->cyh_level = CY_LOW_LEVEL;
2115 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2116 
2117 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2118 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2119 }
2120 
2121 void
2122 cpu_mp_init(void)
2123 {
2124 	cyc_omni_handler_t hdlr;
2125 
2126 	hdlr.cyo_online = opl_ras_online;
2127 	hdlr.cyo_offline = NULL;
2128 	hdlr.cyo_arg = NULL;
2129 	mutex_enter(&cpu_lock);
2130 	(void) cyclic_add_omni(&hdlr);
2131 	mutex_exit(&cpu_lock);
2132 }
2133 
2134 int heaplp_use_stlb = 0;
2135 
2136 void
2137 mmu_init_kernel_pgsz(struct hat *hat)
2138 {
2139 	uint_t tte = page_szc(segkmem_lpsize);
2140 	uchar_t new_cext_primary, new_cext_nucleus;
2141 
2142 	if (heaplp_use_stlb == 0) {
2143 		/* do not reprogram stlb */
2144 		tte = TTE8K;
2145 	}
2146 
2147 	new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K);
2148 	new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte);
2149 
2150 	hat->sfmmu_cext = new_cext_primary;
2151 	kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
2152 	    ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
2153 }
2154 
2155 size_t
2156 mmu_get_kernel_lpsize(size_t lpsize)
2157 {
2158 	uint_t tte;
2159 
2160 	if (lpsize == 0) {
2161 		/* no setting for segkmem_lpsize in /etc/system: use default */
2162 		return (MMU_PAGESIZE4M);
2163 	}
2164 
2165 	for (tte = TTE8K; tte <= TTE4M; tte++) {
2166 		if (lpsize == TTEBYTES(tte))
2167 			return (lpsize);
2168 	}
2169 
2170 	return (TTEBYTES(TTE8K));
2171 }
2172 
2173 /*
2174  * The following are functions that are unused in
2175  * OPL cpu module. They are defined here to resolve
2176  * dependencies in the "unix" module.
2177  * Unused functions that should never be called in
2178  * OPL are coded with ASSERT(0).
2179  */
2180 
2181 void
2182 cpu_disable_errors(void)
2183 {}
2184 
2185 void
2186 cpu_enable_errors(void)
2187 { ASSERT(0); }
2188 
2189 /*ARGSUSED*/
2190 void
2191 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2192 { ASSERT(0); }
2193 
2194 /*ARGSUSED*/
2195 void
2196 cpu_faulted_enter(struct cpu *cp)
2197 {}
2198 
2199 /*ARGSUSED*/
2200 void
2201 cpu_faulted_exit(struct cpu *cp)
2202 {}
2203 
2204 /*ARGSUSED*/
2205 void
2206 cpu_check_allcpus(struct async_flt *aflt)
2207 {}
2208 
2209 /*ARGSUSED*/
2210 void
2211 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2212 { ASSERT(0); }
2213 
2214 /*ARGSUSED*/
2215 void
2216 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2217 { ASSERT(0); }
2218 
2219 /*ARGSUSED*/
2220 void
2221 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2222 { ASSERT(0); }
2223 
2224 /*ARGSUSED*/
2225 void
2226 cpu_busy_ecache_scrub(struct cpu *cp)
2227 {}
2228 
2229 /*ARGSUSED*/
2230 void
2231 cpu_idle_ecache_scrub(struct cpu *cp)
2232 {}
2233 
2234 /* ARGSUSED */
2235 void
2236 cpu_change_speed(uint64_t divisor, uint64_t arg2)
2237 { ASSERT(0); }
2238 
2239 void
2240 cpu_init_cache_scrub(void)
2241 {}
2242 
2243 /* ARGSUSED */
2244 int
2245 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2246 {
2247 	if (&plat_get_mem_sid) {
2248 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
2249 	} else {
2250 		return (ENOTSUP);
2251 	}
2252 }
2253 
2254 /* ARGSUSED */
2255 int
2256 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2257 {
2258 	if (&plat_get_mem_addr) {
2259 		return (plat_get_mem_addr(unum, sid, offset, addrp));
2260 	} else {
2261 		return (ENOTSUP);
2262 	}
2263 }
2264 
2265 /* ARGSUSED */
2266 int
2267 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2268 {
2269 	if (&plat_get_mem_offset) {
2270 		return (plat_get_mem_offset(flt_addr, offp));
2271 	} else {
2272 		return (ENOTSUP);
2273 	}
2274 }
2275 
2276 /*ARGSUSED*/
2277 void
2278 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2279 { ASSERT(0); }
2280 
2281 /*ARGSUSED*/
2282 void
2283 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2284 { ASSERT(0); }
2285 
2286 /*ARGSUSED*/
2287 void
2288 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
2289 { ASSERT(0); }
2290 
2291 /*ARGSUSED*/
2292 int
2293 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
2294     errorq_elem_t *eqep, size_t afltoffset)
2295 {
2296 	ASSERT(0);
2297 	return (0);
2298 }
2299 
2300 /*ARGSUSED*/
2301 char *
2302 flt_to_error_type(struct async_flt *aflt)
2303 {
2304 	ASSERT(0);
2305 	return (NULL);
2306 }
2307 
2308 #define	PROM_SPARC64VII_MODE_PROPNAME	"SPARC64-VII-mode"
2309 
2310 /*
2311  * Check for existence of OPL OBP property that indicates
2312  * SPARC64-VII support. By default, only enable Jupiter
2313  * features if the property is present.   It will be
2314  * present in all-Jupiter domains by OBP if the domain has
2315  * been selected by the user on the system controller to
2316  * run in Jupiter mode.  Basically, this OBP property must
2317  * be present to turn on the cpu_alljupiter flag.
2318  */
2319 static int
2320 prom_SPARC64VII_support_enabled(void)
2321 {
2322 	int val;
2323 
2324 	return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME,
2325 	    (caddr_t)&val) == 0) ? 1 : 0);
2326 }
2327