xref: /titanic_52/usr/src/uts/sun4u/cpu/opl_olympus.c (revision 158643e096f452ce5f7de12c65568293c90ba58c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/cpuvar.h>
45 #include <sys/opl_olympus_regs.h>
46 #include <sys/opl_module.h>
47 #include <sys/async.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/dditypes.h>
51 #include <sys/cpu_module.h>
52 #include <sys/sysmacros.h>
53 #include <sys/intreg.h>
54 #include <sys/clock.h>
55 #include <sys/platform_module.h>
56 #include <sys/ontrap.h>
57 #include <sys/panic.h>
58 #include <sys/memlist.h>
59 #include <sys/ndifm.h>
60 #include <sys/ddifm.h>
61 #include <sys/fm/protocol.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/cpu/SPARC64-VI.h>
64 #include <sys/dtrace.h>
65 #include <sys/watchpoint.h>
66 #include <sys/promif.h>
67 
68 /*
69  * Internal functions.
70  */
71 static int cpu_sync_log_err(void *flt);
72 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
73 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
74 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
75 
76 /*
77  * Error counters resetting interval.
78  */
79 static int opl_async_check_interval = 60;		/* 1 min */
80 
81 uint_t cpu_impl_dual_pgsz = 1;
82 
83 /*
84  * PA[22:0] represent Displacement in Jupiter
85  * configuration space.
86  */
87 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
88 
89 /*
90  * set in /etc/system to control logging of user BERR/TO's
91  */
92 int cpu_berr_to_verbose = 0;
93 
94 static int min_ecache_size;
95 static uint_t priv_hcl_1;
96 static uint_t priv_hcl_2;
97 static uint_t priv_hcl_4;
98 static uint_t priv_hcl_8;
99 
100 /*
101  * Olympus error log
102  */
103 static opl_errlog_t	*opl_err_log;
104 
105 /*
106  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
107  * No any other ecc_type_info insertion is allowed in between the following
108  * four UE classess.
109  */
110 ecc_type_to_info_t ecc_type_to_info[] = {
111 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
112 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
113 	FM_EREPORT_CPU_UE_MEM,
114 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
115 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
116 	FM_EREPORT_CPU_UE_CHANNEL,
117 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
118 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
119 	FM_EREPORT_CPU_UE_CPU,
120 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
121 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
122 	FM_EREPORT_CPU_UE_PATH,
123 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
124 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
125 	FM_EREPORT_CPU_BERR,
126 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
127 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
128 	FM_EREPORT_CPU_BTO,
129 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
130 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
131 	FM_EREPORT_CPU_MTLB,
132 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
133 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
134 	FM_EREPORT_CPU_TLBP,
135 
136 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
137 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
138 	FM_EREPORT_CPU_CRE,
139 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
140 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
141 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
142 	FM_EREPORT_CPU_TSBCTX,
143 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
144 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
145 	FM_EREPORT_CPU_TSBP,
146 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
147 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
148 	FM_EREPORT_CPU_PSTATE,
149 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
150 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
151 	FM_EREPORT_CPU_TSTATE,
152 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
153 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
154 	FM_EREPORT_CPU_IUG_F,
155 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
156 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
157 	FM_EREPORT_CPU_IUG_R,
158 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
159 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
160 	FM_EREPORT_CPU_SDC,
161 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
162 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
163 	FM_EREPORT_CPU_WDT,
164 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
165 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
166 	FM_EREPORT_CPU_DTLB,
167 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
168 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
169 	FM_EREPORT_CPU_ITLB,
170 	UGESR_IUG_COREERR, "IUG_COREERR",
171 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
172 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
173 	FM_EREPORT_CPU_CORE,
174 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
175 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
176 	FM_EREPORT_CPU_DAE,
177 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
178 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
179 	FM_EREPORT_CPU_IAE,
180 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
181 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
182 	FM_EREPORT_CPU_UGE,
183 	0,		NULL,		0,		0,
184 	NULL,  0,	   0,
185 };
186 
187 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
188 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
189 		int *segsp, int *banksp, int *mcidp);
190 
191 
192 /*
193  * Setup trap handlers for 0xA, 0x32, 0x40 trap types.
194  */
195 void
196 cpu_init_trap(void)
197 {
198 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
199 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
200 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
201 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
202 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
203 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
204 }
205 
206 static int
207 getintprop(pnode_t node, char *name, int deflt)
208 {
209 	int	value;
210 
211 	switch (prom_getproplen(node, name)) {
212 	case sizeof (int):
213 		(void) prom_getprop(node, name, (caddr_t)&value);
214 		break;
215 
216 	default:
217 		value = deflt;
218 		break;
219 	}
220 
221 	return (value);
222 }
223 
224 /*
225  * Set the magic constants of the implementation.
226  */
227 /*ARGSUSED*/
228 void
229 cpu_fiximp(pnode_t dnode)
230 {
231 	int i, a;
232 	extern int vac_size, vac_shift;
233 	extern uint_t vac_mask;
234 
235 	static struct {
236 		char	*name;
237 		int	*var;
238 		int	defval;
239 	} prop[] = {
240 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
241 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
242 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
243 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
244 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
245 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
246 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
247 	};
248 
249 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
250 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
251 
252 	ecache_setsize = ecache_size / ecache_associativity;
253 
254 	vac_size = OPL_VAC_SIZE;
255 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
256 	i = 0; a = vac_size;
257 	while (a >>= 1)
258 		++i;
259 	vac_shift = i;
260 	shm_alignment = vac_size;
261 	vac = 1;
262 }
263 
264 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
265 /*
266  * Quick and dirty way to redefine locally in
267  * OPL the value of IDSR_BN_SETS to 31 instead
268  * of the standard 32 value. This is to workaround
269  * REV_B of Olympus_c processor's problem in handling
270  * more than 31 xcall broadcast.
271  */
272 #undef	IDSR_BN_SETS
273 #define	IDSR_BN_SETS    31
274 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
275 
276 void
277 send_mondo_set(cpuset_t set)
278 {
279 	int lo, busy, nack, shipped = 0;
280 	uint16_t i, cpuids[IDSR_BN_SETS];
281 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
282 	uint64_t starttick, endtick, tick, lasttick;
283 #if (NCPU > IDSR_BN_SETS)
284 	int index = 0;
285 	int ncpuids = 0;
286 #endif
287 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
288 	int bn_sets = IDSR_BN_SETS;
289 	uint64_t ver;
290 
291 	ASSERT(NCPU > bn_sets);
292 #endif
293 
294 	ASSERT(!CPUSET_ISNULL(set));
295 	starttick = lasttick = gettick();
296 
297 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
298 	ver = ultra_getver();
299 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
300 		((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
301 		bn_sets = 1;
302 #endif
303 
304 #if (NCPU <= IDSR_BN_SETS)
305 	for (i = 0; i < NCPU; i++)
306 		if (CPU_IN_SET(set, i)) {
307 			shipit(i, shipped);
308 			nackmask |= IDSR_NACK_BIT(shipped);
309 			cpuids[shipped++] = i;
310 			CPUSET_DEL(set, i);
311 			if (CPUSET_ISNULL(set))
312 				break;
313 		}
314 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
315 #else
316 	for (i = 0; i < NCPU; i++)
317 		if (CPU_IN_SET(set, i)) {
318 			ncpuids++;
319 
320 			/*
321 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
322 			 * find we have shipped to more than (IDSR_BN_SETS)
323 			 * CPUs, set "index" to the highest numbered CPU in
324 			 * the set so we can ship to other CPUs a bit later on.
325 			 */
326 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
327 			if (shipped < bn_sets) {
328 #else
329 			if (shipped < IDSR_BN_SETS) {
330 #endif
331 				shipit(i, shipped);
332 				nackmask |= IDSR_NACK_BIT(shipped);
333 				cpuids[shipped++] = i;
334 				CPUSET_DEL(set, i);
335 				if (CPUSET_ISNULL(set))
336 					break;
337 			} else
338 				index = (int)i;
339 		}
340 
341 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
342 #endif
343 
344 	busymask = IDSR_NACK_TO_BUSY(nackmask);
345 	busy = nack = 0;
346 	endtick = starttick + xc_tick_limit;
347 	for (;;) {
348 		idsr = getidsr();
349 #if (NCPU <= IDSR_BN_SETS)
350 		if (idsr == 0)
351 			break;
352 #else
353 		if (idsr == 0 && shipped == ncpuids)
354 			break;
355 #endif
356 		tick = gettick();
357 		/*
358 		 * If there is a big jump between the current tick
359 		 * count and lasttick, we have probably hit a break
360 		 * point.  Adjust endtick accordingly to avoid panic.
361 		 */
362 		if (tick > (lasttick + xc_tick_jump_limit))
363 			endtick += (tick - lasttick);
364 		lasttick = tick;
365 		if (tick > endtick) {
366 			if (panic_quiesce)
367 				return;
368 			cmn_err(CE_CONT, "send mondo timeout "
369 				"[%d NACK %d BUSY]\nIDSR 0x%"
370 				"" PRIx64 "  cpuids:", nack, busy, idsr);
371 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
372 			for (i = 0; i < bn_sets; i++) {
373 #else
374 			for (i = 0; i < IDSR_BN_SETS; i++) {
375 #endif
376 				if (idsr & (IDSR_NACK_BIT(i) |
377 				    IDSR_BUSY_BIT(i))) {
378 					cmn_err(CE_CONT, " 0x%x",
379 						cpuids[i]);
380 				}
381 			}
382 			cmn_err(CE_CONT, "\n");
383 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
384 		}
385 		curnack = idsr & nackmask;
386 		curbusy = idsr & busymask;
387 
388 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
389 		/*
390 		 * Only proceed to send more xcalls if all the
391 		 * cpus in the previous IDSR_BN_SETS were completed.
392 		 */
393 		if (curbusy) {
394 			busy++;
395 			continue;
396 		}
397 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
398 
399 #if (NCPU > IDSR_BN_SETS)
400 		if (shipped < ncpuids) {
401 			uint64_t cpus_left;
402 			uint16_t next = (uint16_t)index;
403 
404 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
405 			    busymask;
406 
407 			if (cpus_left) {
408 				do {
409 					/*
410 					 * Sequence through and ship to the
411 					 * remainder of the CPUs in the system
412 					 * (e.g. other than the first
413 					 * (IDSR_BN_SETS)) in reverse order.
414 					 */
415 					lo = lowbit(cpus_left) - 1;
416 					i = IDSR_BUSY_IDX(lo);
417 					shipit(next, i);
418 					shipped++;
419 					cpuids[i] = next;
420 
421 					/*
422 					 * If we've processed all the CPUs,
423 					 * exit the loop now and save
424 					 * instructions.
425 					 */
426 					if (shipped == ncpuids)
427 						break;
428 
429 					for ((index = ((int)next - 1));
430 						index >= 0; index--)
431 						if (CPU_IN_SET(set, index)) {
432 							next = (uint16_t)index;
433 							break;
434 						}
435 
436 					cpus_left &= ~(1ull << lo);
437 				} while (cpus_left);
438 				continue;
439 			}
440 		}
441 #endif
442 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
443 		if (curbusy) {
444 			busy++;
445 			continue;
446 		}
447 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
448 #ifdef SEND_MONDO_STATS
449 		{
450 			int n = gettick() - starttick;
451 			if (n < 8192)
452 				x_nack_stimes[n >> 7]++;
453 		}
454 #endif
455 		while (gettick() < (tick + sys_clock_mhz))
456 			;
457 		do {
458 			lo = lowbit(curnack) - 1;
459 			i = IDSR_NACK_IDX(lo);
460 			shipit(cpuids[i], i);
461 			curnack &= ~(1ull << lo);
462 		} while (curnack);
463 		nack++;
464 		busy = 0;
465 	}
466 #ifdef SEND_MONDO_STATS
467 	{
468 		int n = gettick() - starttick;
469 		if (n < 8192)
470 			x_set_stimes[n >> 7]++;
471 		else
472 			x_set_ltimes[(n >> 13) & 0xf]++;
473 	}
474 	x_set_cpus[shipped]++;
475 #endif
476 }
477 
478 /*
479  * Cpu private initialization.
480  */
481 void
482 cpu_init_private(struct cpu *cp)
483 {
484 	if (!(IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation))) {
485 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI is supported",
486 			cp->cpu_id, cpunodes[cp->cpu_id].implementation);
487 	}
488 
489 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
490 }
491 
492 void
493 cpu_setup(void)
494 {
495 	extern int at_flags;
496 	extern int cpc_has_overflow_intr;
497 	uint64_t cpu0_log;
498 	extern	 uint64_t opl_cpu0_err_log;
499 
500 	/*
501 	 * Initialize Error log Scratch register for error handling.
502 	 */
503 
504 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
505 	opl_error_setup(cpu0_log);
506 
507 	/*
508 	 * Enable MMU translating multiple page sizes for
509 	 * sITLB and sDTLB.
510 	 */
511 	opl_mpg_enable();
512 
513 	/*
514 	 * Setup chip-specific trap handlers.
515 	 */
516 	cpu_init_trap();
517 
518 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
519 
520 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
521 
522 	/*
523 	 * Due to the number of entries in the fully-associative tlb
524 	 * this may have to be tuned lower than in spitfire.
525 	 */
526 	pp_slots = MIN(8, MAXPP_SLOTS);
527 
528 	/*
529 	 * Block stores do not invalidate all pages of the d$, pagecopy
530 	 * et. al. need virtual translations with virtual coloring taken
531 	 * into consideration.  prefetch/ldd will pollute the d$ on the
532 	 * load side.
533 	 */
534 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
535 
536 	if (use_page_coloring) {
537 		do_pg_coloring = 1;
538 	}
539 
540 	isa_list =
541 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
542 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
543 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
544 
545 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 |
546 	    AV_SPARC_POPC | AV_SPARC_FMAF;
547 
548 	/*
549 	 * On SPARC64-VI, there's no hole in the virtual address space
550 	 */
551 	hole_start = hole_end = 0;
552 
553 	/*
554 	 * The kpm mapping window.
555 	 * kpm_size:
556 	 *	The size of a single kpm range.
557 	 *	The overall size will be: kpm_size * vac_colors.
558 	 * kpm_vbase:
559 	 *	The virtual start address of the kpm range within the kernel
560 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
561 	 */
562 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
563 	kpm_size_shift = 47;
564 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
565 	kpm_smallpages = 1;
566 
567 	/*
568 	 * The traptrace code uses either %tick or %stick for
569 	 * timestamping.  We have %stick so we can use it.
570 	 */
571 	traptrace_use_stick = 1;
572 
573 	/*
574 	 * SPARC64-VI has a performance counter overflow interrupt
575 	 */
576 	cpc_has_overflow_intr = 1;
577 
578 	/*
579 	 * Declare that this architecture/cpu combination does not support
580 	 * fpRAS.
581 	 */
582 	fpras_implemented = 0;
583 }
584 
585 /*
586  * Called by setcpudelay
587  */
588 void
589 cpu_init_tick_freq(void)
590 {
591 	/*
592 	 * For SPARC64-VI we want to use the system clock rate as
593 	 * the basis for low level timing, due to support of mixed
594 	 * speed CPUs and power managment.
595 	 */
596 	if (system_clock_freq == 0)
597 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
598 
599 	sys_tick_freq = system_clock_freq;
600 }
601 
602 #ifdef SEND_MONDO_STATS
603 uint32_t x_one_stimes[64];
604 uint32_t x_one_ltimes[16];
605 uint32_t x_set_stimes[64];
606 uint32_t x_set_ltimes[16];
607 uint32_t x_set_cpus[NCPU];
608 uint32_t x_nack_stimes[64];
609 #endif
610 
611 /*
612  * Note: A version of this function is used by the debugger via the KDI,
613  * and must be kept in sync with this version.  Any changes made to this
614  * function to support new chips or to accomodate errata must also be included
615  * in the KDI-specific version.  See us3_kdi.c.
616  */
617 void
618 send_one_mondo(int cpuid)
619 {
620 	int busy, nack;
621 	uint64_t idsr, starttick, endtick, tick, lasttick;
622 	uint64_t busymask;
623 
624 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
625 	starttick = lasttick = gettick();
626 	shipit(cpuid, 0);
627 	endtick = starttick + xc_tick_limit;
628 	busy = nack = 0;
629 	busymask = IDSR_BUSY;
630 	for (;;) {
631 		idsr = getidsr();
632 		if (idsr == 0)
633 			break;
634 
635 		tick = gettick();
636 		/*
637 		 * If there is a big jump between the current tick
638 		 * count and lasttick, we have probably hit a break
639 		 * point.  Adjust endtick accordingly to avoid panic.
640 		 */
641 		if (tick > (lasttick + xc_tick_jump_limit))
642 			endtick += (tick - lasttick);
643 		lasttick = tick;
644 		if (tick > endtick) {
645 			if (panic_quiesce)
646 				return;
647 			cmn_err(CE_PANIC, "send mondo timeout "
648 				"(target 0x%x) [%d NACK %d BUSY]",
649 					cpuid, nack, busy);
650 		}
651 
652 		if (idsr & busymask) {
653 			busy++;
654 			continue;
655 		}
656 		drv_usecwait(1);
657 		shipit(cpuid, 0);
658 		nack++;
659 		busy = 0;
660 	}
661 #ifdef SEND_MONDO_STATS
662 	{
663 		int n = gettick() - starttick;
664 		if (n < 8192)
665 			x_one_stimes[n >> 7]++;
666 		else
667 			x_one_ltimes[(n >> 13) & 0xf]++;
668 	}
669 #endif
670 }
671 
672 /*
673  * init_mmu_page_sizes is set to one after the bootup time initialization
674  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
675  * valid value.
676  *
677  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
678  * versions of disable_ism_large_pages and disable_large_pages, and feed back
679  * into those two hat variables at hat initialization time.
680  *
681  */
682 int init_mmu_page_sizes = 0;
683 
684 static uint_t mmu_disable_large_pages = 0;
685 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
686 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
687 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
688 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
689 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
690 	(1 << TTE512K));
691 
692 /*
693  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
694  * Called during very early bootup from check_cpus_set().
695  * Can be called to verify that mmu_page_sizes are set up correctly.
696  *
697  * Set Olympus defaults. We do not use the function parameter.
698  */
699 /*ARGSUSED*/
700 int
701 mmu_init_mmu_page_sizes(int32_t not_used)
702 {
703 	if (!init_mmu_page_sizes) {
704 		mmu_page_sizes = MMU_PAGE_SIZES;
705 		mmu_hashcnt = MAX_HASHCNT;
706 		mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
707 		mmu_exported_pagesize_mask = (1 << TTE8K) |
708 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
709 		    (1 << TTE32M) | (1 << TTE256M);
710 		init_mmu_page_sizes = 1;
711 		return (0);
712 	}
713 	return (1);
714 }
715 
716 /* SPARC64-VI worst case DTLB parameters */
717 #ifndef	LOCKED_DTLB_ENTRIES
718 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
719 #endif
720 #define	TOTAL_DTLB_ENTRIES	32
721 #define	AVAIL_32M_ENTRIES	0
722 #define	AVAIL_256M_ENTRIES	0
723 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
724 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
725 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
726 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
727 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
728 
729 /*
730  * The function returns the mmu-specific values for the
731  * hat's disable_large_pages, disable_ism_large_pages, and
732  * disable_auto_data_large_pages and
733  * disable_text_data_large_pages variables.
734  */
735 uint_t
736 mmu_large_pages_disabled(uint_t flag)
737 {
738 	uint_t pages_disable = 0;
739 	extern int use_text_pgsz64K;
740 	extern int use_text_pgsz512K;
741 
742 	if (flag == HAT_LOAD) {
743 		pages_disable =  mmu_disable_large_pages;
744 	} else if (flag == HAT_LOAD_SHARE) {
745 		pages_disable = mmu_disable_ism_large_pages;
746 	} else if (flag == HAT_AUTO_DATA) {
747 		pages_disable = mmu_disable_auto_data_large_pages;
748 	} else if (flag == HAT_AUTO_TEXT) {
749 		pages_disable = mmu_disable_auto_text_large_pages;
750 		if (use_text_pgsz512K) {
751 			pages_disable &= ~(1 << TTE512K);
752 		}
753 		if (use_text_pgsz64K) {
754 			pages_disable &= ~(1 << TTE64K);
755 		}
756 	}
757 	return (pages_disable);
758 }
759 
760 /*
761  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
762  * It may be called from set_platform_defaults, if some value other than 32M
763  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
764  * then only warn, since it would be bad form to panic due to a user typo.
765  *
766  * The function re-initializes the mmu_disable_ism_large_pages variable.
767  */
768 void
769 mmu_init_large_pages(size_t ism_pagesize)
770 {
771 	switch (ism_pagesize) {
772 	case MMU_PAGESIZE4M:
773 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
774 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
775 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
776 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
777 		break;
778 	case MMU_PAGESIZE32M:
779 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
780 		    (1 << TTE512K) | (1 << TTE256M));
781 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
782 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
783 		adjust_data_maxlpsize(ism_pagesize);
784 		break;
785 	case MMU_PAGESIZE256M:
786 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
787 		    (1 << TTE512K) | (1 << TTE32M));
788 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
789 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
790 		adjust_data_maxlpsize(ism_pagesize);
791 		break;
792 	default:
793 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
794 		    ism_pagesize);
795 		break;
796 	}
797 }
798 
799 /*
800  * Function to reprogram the TLBs when page sizes used
801  * by a process change significantly.
802  */
803 void
804 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
805 {
806 	uint8_t pgsz0, pgsz1;
807 
808 	/*
809 	 * Don't program 2nd dtlb for kernel and ism hat
810 	 */
811 	ASSERT(hat->sfmmu_ismhat == NULL);
812 	ASSERT(hat != ksfmmup);
813 
814 	/*
815 	 * hat->sfmmu_pgsz[] is an array whose elements
816 	 * contain a sorted order of page sizes.  Element
817 	 * 0 is the most commonly used page size, followed
818 	 * by element 1, and so on.
819 	 *
820 	 * ttecnt[] is an array of per-page-size page counts
821 	 * mapped into the process.
822 	 *
823 	 * If the HAT's choice for page sizes is unsuitable,
824 	 * we can override it here.  The new values written
825 	 * to the array will be handed back to us later to
826 	 * do the actual programming of the TLB hardware.
827 	 *
828 	 */
829 	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
830 	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
831 
832 	/*
833 	 * This implements PAGESIZE programming of the sTLB
834 	 * if large TTE counts don't exceed the thresholds.
835 	 */
836 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
837 		pgsz0 = page_szc(MMU_PAGESIZE);
838 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
839 		pgsz1 = page_szc(MMU_PAGESIZE);
840 	tmp_pgsz[0] = pgsz0;
841 	tmp_pgsz[1] = pgsz1;
842 	/* otherwise, accept what the HAT chose for us */
843 }
844 
845 /*
846  * The HAT calls this function when an MMU context is allocated so that we
847  * can reprogram the large TLBs appropriately for the new process using
848  * the context.
849  *
850  * The caller must hold the HAT lock.
851  */
852 void
853 mmu_set_ctx_page_sizes(struct hat *hat)
854 {
855 	uint8_t pgsz0, pgsz1;
856 	uint8_t new_cext;
857 
858 	ASSERT(sfmmu_hat_lock_held(hat));
859 	/*
860 	 * Don't program 2nd dtlb for kernel and ism hat
861 	 */
862 	if (hat->sfmmu_ismhat || hat == ksfmmup)
863 		return;
864 
865 	/*
866 	 * If supported, reprogram the TLBs to a larger pagesize.
867 	 */
868 	pgsz0 = hat->sfmmu_pgsz[0];
869 	pgsz1 = hat->sfmmu_pgsz[1];
870 	ASSERT(pgsz0 < mmu_page_sizes);
871 	ASSERT(pgsz1 < mmu_page_sizes);
872 	new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
873 	if (hat->sfmmu_cext != new_cext) {
874 #ifdef DEBUG
875 		int i;
876 		/*
877 		 * assert cnum should be invalid, this is because pagesize
878 		 * can only be changed after a proc's ctxs are invalidated.
879 		 */
880 		for (i = 0; i < max_mmu_ctxdoms; i++) {
881 			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
882 		}
883 #endif /* DEBUG */
884 		hat->sfmmu_cext = new_cext;
885 	}
886 	/*
887 	 * sfmmu_setctx_sec() will take care of the
888 	 * rest of the dirty work for us.
889 	 */
890 }
891 
892 /*
893  * This function assumes that there are either four or six supported page
894  * sizes and at most two programmable TLBs, so we need to decide which
895  * page sizes are most important and then adjust the TLB page sizes
896  * accordingly (if supported).
897  *
898  * If these assumptions change, this function will need to be
899  * updated to support whatever the new limits are.
900  */
901 void
902 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
903 {
904 	uint64_t sortcnt[MMU_PAGE_SIZES];
905 	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
906 	uint8_t i, j, max;
907 	uint16_t oldval, newval;
908 
909 	/*
910 	 * We only consider reprogramming the TLBs if one or more of
911 	 * the two most used page sizes changes and we're using
912 	 * large pages in this process.
913 	 */
914 	if (SFMMU_LGPGS_INUSE(sfmmup)) {
915 		/* Sort page sizes. */
916 		for (i = 0; i < mmu_page_sizes; i++) {
917 			sortcnt[i] = ttecnt[i];
918 		}
919 		for (j = 0; j < mmu_page_sizes; j++) {
920 			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
921 				if (sortcnt[i] > sortcnt[max])
922 					max = i;
923 			}
924 			tmp_pgsz[j] = max;
925 			sortcnt[max] = 0;
926 		}
927 
928 		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
929 
930 		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
931 
932 		/* Check 2 largest values after the sort. */
933 		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
934 		if (newval != oldval) {
935 			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
936 		}
937 	}
938 }
939 
940 /*
941  * Return processor specific async error structure
942  * size used.
943  */
944 int
945 cpu_aflt_size(void)
946 {
947 	return (sizeof (opl_async_flt_t));
948 }
949 
950 /*
951  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
952  * post-process CPU events that are dequeued.  As such, it can be invoked
953  * from softint context, from AST processing in the trap() flow, or from the
954  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
955  * Historically this entry point was used to log the actual cmn_err(9F) text;
956  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
957  * With FMA this function now also returns a flag which indicates to the
958  * caller whether the ereport should be posted (1) or suppressed (0).
959  */
960 /*ARGSUSED*/
961 static int
962 cpu_sync_log_err(void *flt)
963 {
964 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
965 	struct async_flt *aflt = (struct async_flt *)flt;
966 
967 	/*
968 	 * No extra processing of urgent error events.
969 	 * Always generate ereports for these events.
970 	 */
971 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
972 		return (1);
973 
974 	/*
975 	 * Additional processing for synchronous errors.
976 	 */
977 	switch (opl_flt->flt_type) {
978 	case OPL_CPU_INV_SFSR:
979 		return (1);
980 
981 	case OPL_CPU_SYNC_UE:
982 		/*
983 		 * The validity: SFSR_MK_UE bit has been checked
984 		 * in opl_cpu_sync_error()
985 		 * No more check is required.
986 		 *
987 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
988 		 * and they have been retrieved in cpu_queue_events()
989 		 */
990 
991 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
992 			ASSERT(aflt->flt_in_memory);
993 			/*
994 			 * We want to skip logging only if ALL the following
995 			 * conditions are true:
996 			 *
997 			 *	1. We are not panicing already.
998 			 *	2. The error is a memory error.
999 			 *	3. There is only one error.
1000 			 *	4. The error is on a retired page.
1001 			 *	5. The error occurred under on_trap
1002 			 *	protection AFLT_PROT_EC
1003 			 */
1004 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
1005 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
1006 				/*
1007 				 * Do not log an error from
1008 				 * the retired page
1009 				 */
1010 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
1011 				return (0);
1012 			}
1013 			if (!panicstr)
1014 				cpu_page_retire(opl_flt);
1015 		}
1016 		return (1);
1017 
1018 	case OPL_CPU_SYNC_OTHERS:
1019 		/*
1020 		 * For the following error cases, the processor HW does
1021 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
1022 		 * to assign appropriate values here to reflect what we
1023 		 * think is the most likely cause of the problem w.r.t to
1024 		 * the particular error event.  For Buserr and timeout
1025 		 * error event, we will assign OPL_ERRID_CHANNEL as the
1026 		 * most likely reason.  For TLB parity or multiple hit
1027 		 * error events, we will assign the reason as
1028 		 * OPL_ERRID_CPU (cpu related problem) and set the
1029 		 * flt_eid_sid to point to the cpuid.
1030 		 */
1031 
1032 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
1033 			/*
1034 			 * flt_eid_sid will not be used for this case.
1035 			 */
1036 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
1037 		}
1038 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
1039 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1040 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1041 		}
1042 
1043 		/*
1044 		 * In case of no effective error bit
1045 		 */
1046 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
1047 			    opl_flt->flt_eid_mod = OPL_ERRID_CPU;
1048 			    opl_flt->flt_eid_sid = aflt->flt_inst;
1049 		}
1050 		break;
1051 
1052 		default:
1053 			return (1);
1054 	}
1055 	return (1);
1056 }
1057 
1058 /*
1059  * Retire the bad page that may contain the flushed error.
1060  */
1061 void
1062 cpu_page_retire(opl_async_flt_t *opl_flt)
1063 {
1064 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1065 	(void) page_retire(aflt->flt_addr, PR_UE);
1066 }
1067 
1068 /*
1069  * Invoked by error_init() early in startup and therefore before
1070  * startup_errorq() is called to drain any error Q -
1071  *
1072  * startup()
1073  *   startup_end()
1074  *     error_init()
1075  *       cpu_error_init()
1076  * errorq_init()
1077  *   errorq_drain()
1078  * start_other_cpus()
1079  *
1080  * The purpose of this routine is to create error-related taskqs.  Taskqs
1081  * are used for this purpose because cpu_lock can't be grabbed from interrupt
1082  * context.
1083  *
1084  */
1085 /*ARGSUSED*/
1086 void
1087 cpu_error_init(int items)
1088 {
1089 	opl_err_log = (opl_errlog_t *)
1090 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
1091 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
1092 		cmn_err(CE_PANIC, "The base address of the error log "
1093 		    "is not page aligned");
1094 }
1095 
1096 /*
1097  * We route all errors through a single switch statement.
1098  */
1099 void
1100 cpu_ue_log_err(struct async_flt *aflt)
1101 {
1102 	switch (aflt->flt_class) {
1103 	case CPU_FAULT:
1104 		if (cpu_sync_log_err(aflt))
1105 			cpu_ereport_post(aflt);
1106 		break;
1107 
1108 	case BUS_FAULT:
1109 		bus_async_log_err(aflt);
1110 		break;
1111 
1112 	default:
1113 		cmn_err(CE_WARN, "discarding async error %p with invalid "
1114 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1115 		return;
1116 	}
1117 }
1118 
1119 /*
1120  * Routine for panic hook callback from panic_idle().
1121  *
1122  * Nothing to do here.
1123  */
1124 void
1125 cpu_async_panic_callb(void)
1126 {
1127 }
1128 
1129 /*
1130  * Routine to return a string identifying the physical name
1131  * associated with a memory/cache error.
1132  */
1133 /*ARGSUSED*/
1134 int
1135 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
1136     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
1137     ushort_t flt_status, char *buf, int buflen, int *lenp)
1138 {
1139 	int synd_code;
1140 	int ret;
1141 
1142 	/*
1143 	 * An AFSR of -1 defaults to a memory syndrome.
1144 	 */
1145 	synd_code = (int)flt_synd;
1146 
1147 	if (&plat_get_mem_unum) {
1148 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
1149 			flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
1150 			buf[0] = '\0';
1151 			*lenp = 0;
1152 		}
1153 		return (ret);
1154 	}
1155 	buf[0] = '\0';
1156 	*lenp = 0;
1157 	return (ENOTSUP);
1158 }
1159 
1160 /*
1161  * Wrapper for cpu_get_mem_unum() routine that takes an
1162  * async_flt struct rather than explicit arguments.
1163  */
1164 int
1165 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1166     char *buf, int buflen, int *lenp)
1167 {
1168 	/*
1169 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
1170 	 * memory error.
1171 	 */
1172 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
1173 	    (uint64_t)-1,
1174 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
1175 	    aflt->flt_status, buf, buflen, lenp));
1176 }
1177 
1178 /*
1179  * This routine is a more generic interface to cpu_get_mem_unum()
1180  * that may be used by other modules (e.g. mm).
1181  */
1182 /*ARGSUSED*/
1183 int
1184 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1185     char *buf, int buflen, int *lenp)
1186 {
1187 	int synd_status, flt_in_memory, ret;
1188 	ushort_t flt_status = 0;
1189 	char unum[UNUM_NAMLEN];
1190 
1191 	/*
1192 	 * Check for an invalid address.
1193 	 */
1194 	if (afar == (uint64_t)-1)
1195 		return (ENXIO);
1196 
1197 	if (synd == (uint64_t)-1)
1198 		synd_status = AFLT_STAT_INVALID;
1199 	else
1200 		synd_status = AFLT_STAT_VALID;
1201 
1202 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
1203 		pf_is_memory(afar >> MMU_PAGESHIFT);
1204 
1205 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1206 		CPU->cpu_id, flt_in_memory, flt_status, unum,
1207 		UNUM_NAMLEN, lenp);
1208 	if (ret != 0)
1209 		return (ret);
1210 
1211 	if (*lenp >= buflen)
1212 		return (ENAMETOOLONG);
1213 
1214 	(void) strncpy(buf, unum, buflen);
1215 
1216 	return (0);
1217 }
1218 
1219 /*
1220  * Routine to return memory information associated
1221  * with a physical address and syndrome.
1222  */
1223 /*ARGSUSED*/
1224 int
1225 cpu_get_mem_info(uint64_t synd, uint64_t afar,
1226     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1227     int *segsp, int *banksp, int *mcidp)
1228 {
1229 	int synd_code = (int)synd;
1230 
1231 	if (afar == (uint64_t)-1)
1232 		return (ENXIO);
1233 
1234 	if (p2get_mem_info != NULL)
1235 		return ((p2get_mem_info)(synd_code, afar,
1236 			mem_sizep, seg_sizep, bank_sizep,
1237 			segsp, banksp, mcidp));
1238 	else
1239 		return (ENOTSUP);
1240 }
1241 
1242 /*
1243  * Routine to return a string identifying the physical
1244  * name associated with a cpuid.
1245  */
1246 int
1247 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1248 {
1249 	int ret;
1250 	char unum[UNUM_NAMLEN];
1251 
1252 	if (&plat_get_cpu_unum) {
1253 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
1254 			!= 0)
1255 			return (ret);
1256 	} else {
1257 		return (ENOTSUP);
1258 	}
1259 
1260 	if (*lenp >= buflen)
1261 		return (ENAMETOOLONG);
1262 
1263 	(void) strncpy(buf, unum, *lenp);
1264 
1265 	return (0);
1266 }
1267 
1268 /*
1269  * This routine exports the name buffer size.
1270  */
1271 size_t
1272 cpu_get_name_bufsize()
1273 {
1274 	return (UNUM_NAMLEN);
1275 }
1276 
1277 /*
1278  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
1279  */
1280 void
1281 cpu_flush_ecache(void)
1282 {
1283 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
1284 	    cpunodes[CPU->cpu_id].ecache_linesize);
1285 }
1286 
1287 static uint8_t
1288 flt_to_trap_type(struct async_flt *aflt)
1289 {
1290 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
1291 		return (TRAP_TYPE_ECC_I);
1292 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
1293 		return (TRAP_TYPE_ECC_D);
1294 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
1295 		return (TRAP_TYPE_URGENT);
1296 	return (TRAP_TYPE_UNKNOWN);
1297 }
1298 
1299 /*
1300  * Encode the data saved in the opl_async_flt_t struct into
1301  * the FM ereport payload.
1302  */
1303 /* ARGSUSED */
1304 static void
1305 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
1306 		nvlist_t *resource)
1307 {
1308 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
1309 	char unum[UNUM_NAMLEN];
1310 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1311 	int len;
1312 
1313 
1314 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
1315 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
1316 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1317 	}
1318 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
1319 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
1320 			DATA_TYPE_UINT64, aflt->flt_addr, NULL);
1321 	}
1322 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
1323 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
1324 			DATA_TYPE_UINT64, aflt->flt_stat, NULL);
1325 	}
1326 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
1327 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
1328 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
1329 	}
1330 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
1331 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
1332 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
1333 	}
1334 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
1335 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
1336 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
1337 	}
1338 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
1339 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
1340 		    DATA_TYPE_BOOLEAN_VALUE,
1341 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
1342 	}
1343 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
1344 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
1345 			DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
1346 	}
1347 
1348 	switch (opl_flt->flt_eid_mod) {
1349 	case OPL_ERRID_CPU:
1350 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1351 			(u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
1352 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
1353 			NULL, opl_flt->flt_eid_sid,
1354 			(uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version,
1355 			sbuf);
1356 		fm_payload_set(payload,
1357 			FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1358 			DATA_TYPE_NVLIST, resource, NULL);
1359 		break;
1360 
1361 	case OPL_ERRID_CHANNEL:
1362 		/*
1363 		 * No resource is created but the cpumem DE will find
1364 		 * the defective path by retreiving EID from SFSR which is
1365 		 * included in the payload.
1366 		 */
1367 		break;
1368 
1369 	case OPL_ERRID_MEM:
1370 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
1371 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
1372 			NULL, unum, NULL, (uint64_t)-1);
1373 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
1374 			DATA_TYPE_NVLIST, resource, NULL);
1375 		break;
1376 
1377 	case OPL_ERRID_PATH:
1378 		/*
1379 		 * No resource is created but the cpumem DE will find
1380 		 * the defective path by retreiving EID from SFSR which is
1381 		 * included in the payload.
1382 		 */
1383 		break;
1384 	}
1385 }
1386 
1387 /*
1388  * Returns whether fault address is valid for this error bit and
1389  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
1390  */
1391 /*ARGSUSED*/
1392 static int
1393 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
1394 {
1395 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1396 
1397 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
1398 		return ((t_afsr_bit & SFSR_MEMORY) &&
1399 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
1400 	}
1401 	return (0);
1402 }
1403 
1404 /*
1405  * In OPL SCF does the stick synchronization.
1406  */
1407 void
1408 sticksync_slave(void)
1409 {
1410 }
1411 
1412 /*
1413  * In OPL SCF does the stick synchronization.
1414  */
1415 void
1416 sticksync_master(void)
1417 {
1418 }
1419 
1420 /*
1421  * Cpu private unitialization.  OPL cpus do not use the private area.
1422  */
1423 void
1424 cpu_uninit_private(struct cpu *cp)
1425 {
1426 	cmp_delete_cpu(cp->cpu_id);
1427 }
1428 
1429 /*
1430  * Always flush an entire cache.
1431  */
1432 void
1433 cpu_error_ecache_flush(void)
1434 {
1435 	cpu_flush_ecache();
1436 }
1437 
1438 void
1439 cpu_ereport_post(struct async_flt *aflt)
1440 {
1441 	char *cpu_type, buf[FM_MAX_CLASS];
1442 	nv_alloc_t *nva = NULL;
1443 	nvlist_t *ereport, *detector, *resource;
1444 	errorq_elem_t *eqep;
1445 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
1446 
1447 	if (aflt->flt_panic || panicstr) {
1448 		eqep = errorq_reserve(ereport_errorq);
1449 		if (eqep == NULL)
1450 			return;
1451 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
1452 		nva = errorq_elem_nva(ereport_errorq, eqep);
1453 	} else {
1454 		ereport = fm_nvlist_create(nva);
1455 	}
1456 
1457 	/*
1458 	 * Create the scheme "cpu" FMRI.
1459 	 */
1460 	detector = fm_nvlist_create(nva);
1461 	resource = fm_nvlist_create(nva);
1462 	switch (cpunodes[aflt->flt_inst].implementation) {
1463 	case OLYMPUS_C_IMPL:
1464 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
1465 		break;
1466 	default:
1467 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
1468 		break;
1469 	}
1470 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
1471 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
1472 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
1473 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
1474 	    sbuf);
1475 
1476 	/*
1477 	 * Encode all the common data into the ereport.
1478 	 */
1479 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
1480 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
1481 
1482 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
1483 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
1484 
1485 	/*
1486 	 * Encode the error specific data that was saved in
1487 	 * the async_flt structure into the ereport.
1488 	 */
1489 	cpu_payload_add_aflt(aflt, ereport, resource);
1490 
1491 	if (aflt->flt_panic || panicstr) {
1492 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
1493 	} else {
1494 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
1495 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
1496 		fm_nvlist_destroy(detector, FM_NVA_FREE);
1497 		fm_nvlist_destroy(resource, FM_NVA_FREE);
1498 	}
1499 }
1500 
1501 void
1502 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
1503 {
1504 	int status;
1505 	ddi_fm_error_t de;
1506 
1507 	bzero(&de, sizeof (ddi_fm_error_t));
1508 
1509 	de.fme_version = DDI_FME_VERSION;
1510 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
1511 	de.fme_flag = expected;
1512 	de.fme_bus_specific = (void *)aflt->flt_addr;
1513 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
1514 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
1515 		aflt->flt_panic = 1;
1516 }
1517 
1518 void
1519 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
1520     errorq_t *eqp, uint_t flag)
1521 {
1522 	struct async_flt *aflt = (struct async_flt *)payload;
1523 
1524 	aflt->flt_erpt_class = error_class;
1525 	errorq_dispatch(eqp, payload, payload_sz, flag);
1526 }
1527 
1528 void
1529 adjust_hw_copy_limits(int ecache_size)
1530 {
1531 	/*
1532 	 * Set hw copy limits.
1533 	 *
1534 	 * /etc/system will be parsed later and can override one or more
1535 	 * of these settings.
1536 	 *
1537 	 * At this time, ecache size seems only mildly relevant.
1538 	 * We seem to run into issues with the d-cache and stalls
1539 	 * we see on misses.
1540 	 *
1541 	 * Cycle measurement indicates that 2 byte aligned copies fare
1542 	 * little better than doing things with VIS at around 512 bytes.
1543 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
1544 	 * aligned is faster whenever the source and destination data
1545 	 * in cache and the total size is less than 2 Kbytes.  The 2K
1546 	 * limit seems to be driven by the 2K write cache.
1547 	 * When more than 2K of copies are done in non-VIS mode, stores
1548 	 * backup in the write cache.  In VIS mode, the write cache is
1549 	 * bypassed, allowing faster cache-line writes aligned on cache
1550 	 * boundaries.
1551 	 *
1552 	 * In addition, in non-VIS mode, there is no prefetching, so
1553 	 * for larger copies, the advantage of prefetching to avoid even
1554 	 * occasional cache misses is enough to justify using the VIS code.
1555 	 *
1556 	 * During testing, it was discovered that netbench ran 3% slower
1557 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
1558 	 * applications, data is only used once (copied to the output
1559 	 * buffer, then copied by the network device off the system).  Using
1560 	 * the VIS copy saves more L2 cache state.  Network copies are
1561 	 * around 1.3K to 1.5K in size for historical reasons.
1562 	 *
1563 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
1564 	 * aligned copy even for large caches and 8 MB ecache.  The
1565 	 * infrastructure to allow different limits for different sized
1566 	 * caches is kept to allow further tuning in later releases.
1567 	 */
1568 
1569 	if (min_ecache_size == 0 && use_hw_bcopy) {
1570 		/*
1571 		 * First time through - should be before /etc/system
1572 		 * is read.
1573 		 * Could skip the checks for zero but this lets us
1574 		 * preserve any debugger rewrites.
1575 		 */
1576 		if (hw_copy_limit_1 == 0) {
1577 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
1578 			priv_hcl_1 = hw_copy_limit_1;
1579 		}
1580 		if (hw_copy_limit_2 == 0) {
1581 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
1582 			priv_hcl_2 = hw_copy_limit_2;
1583 		}
1584 		if (hw_copy_limit_4 == 0) {
1585 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
1586 			priv_hcl_4 = hw_copy_limit_4;
1587 		}
1588 		if (hw_copy_limit_8 == 0) {
1589 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
1590 			priv_hcl_8 = hw_copy_limit_8;
1591 		}
1592 		min_ecache_size = ecache_size;
1593 	} else {
1594 		/*
1595 		 * MP initialization. Called *after* /etc/system has
1596 		 * been parsed. One CPU has already been initialized.
1597 		 * Need to cater for /etc/system having scragged one
1598 		 * of our values.
1599 		 */
1600 		if (ecache_size == min_ecache_size) {
1601 			/*
1602 			 * Same size ecache. We do nothing unless we
1603 			 * have a pessimistic ecache setting. In that
1604 			 * case we become more optimistic (if the cache is
1605 			 * large enough).
1606 			 */
1607 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
1608 				/*
1609 				 * Need to adjust hw_copy_limit* from our
1610 				 * pessimistic uniprocessor value to a more
1611 				 * optimistic UP value *iff* it hasn't been
1612 				 * reset.
1613 				 */
1614 				if ((ecache_size > 1048576) &&
1615 				    (priv_hcl_8 == hw_copy_limit_8)) {
1616 					if (ecache_size <= 2097152)
1617 						hw_copy_limit_8 = 4 *
1618 						    VIS_COPY_THRESHOLD;
1619 					else if (ecache_size <= 4194304)
1620 						hw_copy_limit_8 = 4 *
1621 						    VIS_COPY_THRESHOLD;
1622 					else
1623 						hw_copy_limit_8 = 4 *
1624 						    VIS_COPY_THRESHOLD;
1625 					priv_hcl_8 = hw_copy_limit_8;
1626 				}
1627 			}
1628 		} else if (ecache_size < min_ecache_size) {
1629 			/*
1630 			 * A different ecache size. Can this even happen?
1631 			 */
1632 			if (priv_hcl_8 == hw_copy_limit_8) {
1633 				/*
1634 				 * The previous value that we set
1635 				 * is unchanged (i.e., it hasn't been
1636 				 * scragged by /etc/system). Rewrite it.
1637 				 */
1638 				if (ecache_size <= 1048576)
1639 					hw_copy_limit_8 = 8 *
1640 					    VIS_COPY_THRESHOLD;
1641 				else if (ecache_size <= 2097152)
1642 					hw_copy_limit_8 = 8 *
1643 					    VIS_COPY_THRESHOLD;
1644 				else if (ecache_size <= 4194304)
1645 					hw_copy_limit_8 = 8 *
1646 					    VIS_COPY_THRESHOLD;
1647 				else
1648 					hw_copy_limit_8 = 10 *
1649 					    VIS_COPY_THRESHOLD;
1650 				priv_hcl_8 = hw_copy_limit_8;
1651 				min_ecache_size = ecache_size;
1652 			}
1653 		}
1654 	}
1655 }
1656 
1657 #define	VIS_BLOCKSIZE		64
1658 
1659 int
1660 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
1661 {
1662 	int ret, watched;
1663 
1664 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1665 	ret = dtrace_blksuword32(addr, data, 0);
1666 	if (watched)
1667 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
1668 
1669 	return (ret);
1670 }
1671 
1672 void
1673 opl_cpu_reg_init()
1674 {
1675 	uint64_t	this_cpu_log;
1676 
1677 	/*
1678 	 * We do not need to re-initialize cpu0 registers.
1679 	 */
1680 	if (cpu[getprocessorid()] == &cpu0)
1681 		return;
1682 
1683 	/*
1684 	 * Initialize Error log Scratch register for error handling.
1685 	 */
1686 
1687 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
1688 		ERRLOG_BUFSZ * (getprocessorid())));
1689 	opl_error_setup(this_cpu_log);
1690 
1691 	/*
1692 	 * Enable MMU translating multiple page sizes for
1693 	 * sITLB and sDTLB.
1694 	 */
1695 	opl_mpg_enable();
1696 }
1697 
1698 /*
1699  * Queue one event in ue_queue based on ecc_type_to_info entry.
1700  */
1701 static void
1702 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
1703     ecc_type_to_info_t *eccp)
1704 {
1705 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1706 
1707 	if (reason &&
1708 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
1709 		(void) strcat(reason, eccp->ec_reason);
1710 	}
1711 
1712 	opl_flt->flt_bit = eccp->ec_afsr_bit;
1713 	opl_flt->flt_type = eccp->ec_flt_type;
1714 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
1715 	aflt->flt_payload = eccp->ec_err_payload;
1716 
1717 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
1718 	cpu_errorq_dispatch(eccp->ec_err_class,
1719 		(void *)opl_flt, sizeof (opl_async_flt_t),
1720 		ue_queue,
1721 		aflt->flt_panic);
1722 }
1723 
1724 /*
1725  * Queue events on async event queue one event per error bit.
1726  * Return number of events queued.
1727  */
1728 int
1729 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
1730 {
1731 	struct async_flt *aflt = (struct async_flt *)opl_flt;
1732 	ecc_type_to_info_t *eccp;
1733 	int nevents = 0;
1734 
1735 	/*
1736 	 * Queue expected errors, error bit and fault type must must match
1737 	 * in the ecc_type_to_info table.
1738 	 */
1739 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
1740 		eccp++) {
1741 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
1742 		    (eccp->ec_flags & aflt->flt_status) != 0) {
1743 			/*
1744 			 * UE error event can be further
1745 			 * classified/breakdown into finer granularity
1746 			 * based on the flt_eid_mod value set by HW.  We do
1747 			 * special handling here so that we can report UE
1748 			 * error in finer granularity as ue_mem,
1749 			 * ue_channel, ue_cpu or ue_path.
1750 			 */
1751 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
1752 				opl_flt->flt_eid_mod =
1753 					(aflt->flt_stat & SFSR_EID_MOD)
1754 					>> SFSR_EID_MOD_SHIFT;
1755 				opl_flt->flt_eid_sid =
1756 					(aflt->flt_stat & SFSR_EID_SID)
1757 					>> SFSR_EID_SID_SHIFT;
1758 				/*
1759 				 * Need to advance eccp pointer by flt_eid_mod
1760 				 * so that we get an appropriate ecc pointer
1761 				 *
1762 				 * EID			# of advances
1763 				 * ----------------------------------
1764 				 * OPL_ERRID_MEM	0
1765 				 * OPL_ERRID_CHANNEL	1
1766 				 * OPL_ERRID_CPU	2
1767 				 * OPL_ERRID_PATH	3
1768 				 */
1769 				eccp += opl_flt->flt_eid_mod;
1770 			}
1771 			cpu_queue_one_event(opl_flt, reason, eccp);
1772 			t_afsr_errs &= ~eccp->ec_afsr_bit;
1773 			nevents++;
1774 		}
1775 	}
1776 
1777 	return (nevents);
1778 }
1779 
1780 /*
1781  * Sync. error wrapper functions.
1782  * We use these functions in order to transfer here from the
1783  * nucleus trap handler information about trap type (data or
1784  * instruction) and trap level (0 or above 0). This way we
1785  * get rid of using SFSR's reserved bits.
1786  */
1787 
1788 #define	OPL_SYNC_TL0	0
1789 #define	OPL_SYNC_TL1	1
1790 #define	OPL_ISYNC_ERR	0
1791 #define	OPL_DSYNC_ERR	1
1792 
1793 void
1794 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1795 {
1796 	uint64_t t_sfar = p_sfar;
1797 	uint64_t t_sfsr = p_sfsr;
1798 
1799 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1800 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
1801 }
1802 
1803 void
1804 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1805 {
1806 	uint64_t t_sfar = p_sfar;
1807 	uint64_t t_sfsr = p_sfsr;
1808 
1809 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1810 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
1811 }
1812 
1813 void
1814 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1815 {
1816 	uint64_t t_sfar = p_sfar;
1817 	uint64_t t_sfsr = p_sfsr;
1818 
1819 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1820 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
1821 }
1822 
1823 void
1824 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
1825 {
1826 	uint64_t t_sfar = p_sfar;
1827 	uint64_t t_sfsr = p_sfsr;
1828 
1829 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
1830 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
1831 }
1832 
1833 /*
1834  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
1835  * and TLB_PRT.
1836  * This function is designed based on cpu_deferred_error().
1837  */
1838 
1839 static void
1840 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
1841     uint_t tl, uint_t derr)
1842 {
1843 	opl_async_flt_t opl_flt;
1844 	struct async_flt *aflt;
1845 	int trampolined = 0;
1846 	char pr_reason[MAX_REASON_STRING];
1847 	uint64_t log_sfsr;
1848 	int expected = DDI_FM_ERR_UNEXPECTED;
1849 	ddi_acc_hdl_t *hp;
1850 
1851 	/*
1852 	 * We need to look at p_flag to determine if the thread detected an
1853 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1854 	 * because we just need a consistent snapshot and we know that everyone
1855 	 * else will store a consistent set of bits while holding p_lock.  We
1856 	 * don't have to worry about a race because SDOCORE is set once prior
1857 	 * to doing i/o from the process's address space and is never cleared.
1858 	 */
1859 	uint_t pflag = ttoproc(curthread)->p_flag;
1860 
1861 	pr_reason[0] = '\0';
1862 
1863 	/*
1864 	 * handle the specific error
1865 	 */
1866 	bzero(&opl_flt, sizeof (opl_async_flt_t));
1867 	aflt = (struct async_flt *)&opl_flt;
1868 	aflt->flt_id = gethrtime_waitfree();
1869 	aflt->flt_bus_id = getprocessorid();
1870 	aflt->flt_inst = CPU->cpu_id;
1871 	aflt->flt_stat = t_sfsr;
1872 	aflt->flt_addr = t_sfar;
1873 	aflt->flt_pc = (caddr_t)rp->r_pc;
1874 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
1875 	aflt->flt_class = (uchar_t)CPU_FAULT;
1876 	aflt->flt_priv = (uchar_t)
1877 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
1878 	aflt->flt_tl = (uchar_t)tl;
1879 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
1880 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
1881 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1882 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
1883 
1884 	/*
1885 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
1886 	 * So, clear all error bits to avoid mis-handling and force the system
1887 	 * panicked.
1888 	 * We skip all the procedures below down to the panic message call.
1889 	 */
1890 	if (!(t_sfsr & SFSR_FV)) {
1891 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1892 		aflt->flt_panic = 1;
1893 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1894 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
1895 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
1896 			aflt->flt_panic);
1897 		fm_panic("%sErrors(s)", "invalid SFSR");
1898 	}
1899 
1900 	/*
1901 	 * If either UE and MK bit is off, this is not valid UE error.
1902 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
1903 	 * mis-handling below.
1904 	 * aflt->flt_stat keeps the original bits as a reference.
1905 	 */
1906 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
1907 	    (SFSR_MK_UE|SFSR_UE)) {
1908 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
1909 	}
1910 
1911 	/*
1912 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1913 	 * see if we were executing in the kernel under on_trap() or t_lofault
1914 	 * protection.  If so, modify the saved registers so that we return
1915 	 * from the trap to the appropriate trampoline routine.
1916 	 */
1917 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
1918 		if (curthread->t_ontrap != NULL) {
1919 			on_trap_data_t *otp = curthread->t_ontrap;
1920 
1921 			if (otp->ot_prot & OT_DATA_EC) {
1922 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
1923 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
1924 				rp->r_pc = otp->ot_trampoline;
1925 				rp->r_npc = rp->r_pc + 4;
1926 				trampolined = 1;
1927 			}
1928 
1929 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
1930 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1931 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
1932 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
1933 				rp->r_pc = otp->ot_trampoline;
1934 				rp->r_npc = rp->r_pc + 4;
1935 				trampolined = 1;
1936 				/*
1937 				 * for peeks and caut_gets errors are expected
1938 				 */
1939 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1940 				if (!hp)
1941 					expected = DDI_FM_ERR_PEEK;
1942 				else if (hp->ah_acc.devacc_attr_access ==
1943 				    DDI_CAUTIOUS_ACC)
1944 					expected = DDI_FM_ERR_EXPECTED;
1945 			}
1946 
1947 		} else if (curthread->t_lofault) {
1948 			aflt->flt_prot = AFLT_PROT_COPY;
1949 			rp->r_g1 = EFAULT;
1950 			rp->r_pc = curthread->t_lofault;
1951 			rp->r_npc = rp->r_pc + 4;
1952 			trampolined = 1;
1953 		}
1954 	}
1955 
1956 	/*
1957 	 * If we're in user mode or we're doing a protected copy, we either
1958 	 * want the ASTON code below to send a signal to the user process
1959 	 * or we want to panic if aft_panic is set.
1960 	 *
1961 	 * If we're in privileged mode and we're not doing a copy, then we
1962 	 * need to check if we've trampolined.  If we haven't trampolined,
1963 	 * we should panic.
1964 	 */
1965 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1966 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
1967 			aflt->flt_panic |= aft_panic;
1968 	} else if (!trampolined) {
1969 		aflt->flt_panic = 1;
1970 	}
1971 
1972 	/*
1973 	 * If we've trampolined due to a privileged TO or BERR, or if an
1974 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1975 	 * event for that TO or BERR.  Queue all other events (if any) besides
1976 	 * the TO/BERR.
1977 	 */
1978 	log_sfsr = t_sfsr;
1979 	if (trampolined) {
1980 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
1981 	} else if (!aflt->flt_priv) {
1982 		/*
1983 		 * User mode, suppress messages if
1984 		 * cpu_berr_to_verbose is not set.
1985 		 */
1986 		if (!cpu_berr_to_verbose)
1987 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
1988 	}
1989 
1990 	if (((log_sfsr & SFSR_ERRS) &&
1991 		(cpu_queue_events(&opl_flt, pr_reason, t_sfsr) == 0)) ||
1992 	    ((t_sfsr & SFSR_ERRS) == 0)) {
1993 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
1994 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
1995 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR,
1996 			(void *)&opl_flt, sizeof (opl_async_flt_t), ue_queue,
1997 			aflt->flt_panic);
1998 	}
1999 
2000 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
2001 		cpu_run_bus_error_handlers(aflt, expected);
2002 	}
2003 
2004 	/*
2005 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2006 	 * be logged as part of the panic flow.
2007 	 */
2008 	if (aflt->flt_panic) {
2009 		if (pr_reason[0] == 0)
2010 			strcpy(pr_reason, "invalid SFSR ");
2011 
2012 		fm_panic("%sErrors(s)", pr_reason);
2013 	}
2014 
2015 	/*
2016 	 * If we queued an error and we are going to return from the trap and
2017 	 * the error was in user mode or inside of a copy routine, set AST flag
2018 	 * so the queue will be drained before returning to user mode.  The
2019 	 * AST processing will also act on our failure policy.
2020 	 */
2021 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2022 		int pcb_flag = 0;
2023 
2024 		if (t_sfsr & (SFSR_ERRS &
2025 			~(SFSR_BERR | SFSR_TO)))
2026 			pcb_flag |= ASYNC_HWERR;
2027 
2028 		if (t_sfsr & SFSR_BERR)
2029 			pcb_flag |= ASYNC_BERR;
2030 
2031 		if (t_sfsr & SFSR_TO)
2032 			pcb_flag |= ASYNC_BTO;
2033 
2034 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2035 		aston(curthread);
2036 	}
2037 }
2038 
2039 /*ARGSUSED*/
2040 void
2041 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
2042 {
2043 	opl_async_flt_t opl_flt;
2044 	struct async_flt *aflt;
2045 	char pr_reason[MAX_REASON_STRING];
2046 
2047 	/* normalize tl */
2048 	tl = (tl >= 2 ? 1 : 0);
2049 	pr_reason[0] = '\0';
2050 
2051 	bzero(&opl_flt, sizeof (opl_async_flt_t));
2052 	aflt = (struct async_flt *)&opl_flt;
2053 	aflt->flt_id = gethrtime_waitfree();
2054 	aflt->flt_bus_id = getprocessorid();
2055 	aflt->flt_inst = CPU->cpu_id;
2056 	aflt->flt_stat = p_ugesr;
2057 	aflt->flt_pc = (caddr_t)rp->r_pc;
2058 	aflt->flt_class = (uchar_t)CPU_FAULT;
2059 	aflt->flt_tl = tl;
2060 	aflt->flt_priv = (uchar_t)
2061 		(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?  1 : 0));
2062 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
2063 	aflt->flt_panic = 1;
2064 	/*
2065 	 * HW does not set mod/sid in case of urgent error.
2066 	 * So we have to set it here.
2067 	 */
2068 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
2069 	opl_flt.flt_eid_sid = aflt->flt_inst;
2070 
2071 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
2072 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
2073 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
2074 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG,
2075 			(void *)&opl_flt, sizeof (opl_async_flt_t),
2076 			ue_queue, aflt->flt_panic);
2077 	}
2078 
2079 	fm_panic("Urgent Error");
2080 }
2081 
2082 /*
2083  * Initialization error counters resetting.
2084  */
2085 /* ARGSUSED */
2086 static void
2087 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
2088 {
2089 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
2090 	hdlr->cyh_level = CY_LOW_LEVEL;
2091 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
2092 
2093 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
2094 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
2095 }
2096 
2097 void
2098 cpu_mp_init(void)
2099 {
2100 	cyc_omni_handler_t hdlr;
2101 
2102 	hdlr.cyo_online = opl_ras_online;
2103 	hdlr.cyo_offline = NULL;
2104 	hdlr.cyo_arg = NULL;
2105 	mutex_enter(&cpu_lock);
2106 	(void) cyclic_add_omni(&hdlr);
2107 	mutex_exit(&cpu_lock);
2108 }
2109 
2110 int heaplp_use_stlb = 0;
2111 
2112 void
2113 mmu_init_kernel_pgsz(struct hat *hat)
2114 {
2115 	uint_t tte = page_szc(segkmem_lpsize);
2116 	uchar_t new_cext_primary, new_cext_nucleus;
2117 
2118 	if (heaplp_use_stlb == 0) {
2119 		/* do not reprogram stlb */
2120 		tte = TTE8K;
2121 	}
2122 
2123 	new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K);
2124 	new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte);
2125 
2126 	hat->sfmmu_cext = new_cext_primary;
2127 	kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
2128 		((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
2129 }
2130 
2131 size_t
2132 mmu_get_kernel_lpsize(size_t lpsize)
2133 {
2134 	uint_t tte;
2135 
2136 	if (lpsize == 0) {
2137 		/* no setting for segkmem_lpsize in /etc/system: use default */
2138 		return (MMU_PAGESIZE4M);
2139 	}
2140 
2141 	for (tte = TTE8K; tte <= TTE4M; tte++) {
2142 		if (lpsize == TTEBYTES(tte))
2143 			return (lpsize);
2144 	}
2145 
2146 	return (TTEBYTES(TTE8K));
2147 }
2148 
2149 /*
2150  * The following are functions that are unused in
2151  * OPL cpu module. They are defined here to resolve
2152  * dependencies in the "unix" module.
2153  * Unused functions that should never be called in
2154  * OPL are coded with ASSERT(0).
2155  */
2156 
2157 void
2158 cpu_disable_errors(void)
2159 {}
2160 
2161 void
2162 cpu_enable_errors(void)
2163 { ASSERT(0); }
2164 
2165 /*ARGSUSED*/
2166 void
2167 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
2168 { ASSERT(0); }
2169 
2170 /*ARGSUSED*/
2171 void
2172 cpu_faulted_enter(struct cpu *cp)
2173 {}
2174 
2175 /*ARGSUSED*/
2176 void
2177 cpu_faulted_exit(struct cpu *cp)
2178 {}
2179 
2180 /*ARGSUSED*/
2181 void
2182 cpu_check_allcpus(struct async_flt *aflt)
2183 {}
2184 
2185 /*ARGSUSED*/
2186 void
2187 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
2188 { ASSERT(0); }
2189 
2190 /*ARGSUSED*/
2191 void
2192 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
2193 { ASSERT(0); }
2194 
2195 /*ARGSUSED*/
2196 void
2197 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
2198 { ASSERT(0); }
2199 
2200 /*ARGSUSED*/
2201 void
2202 cpu_busy_ecache_scrub(struct cpu *cp)
2203 {}
2204 
2205 /*ARGSUSED*/
2206 void
2207 cpu_idle_ecache_scrub(struct cpu *cp)
2208 {}
2209 
2210 /* ARGSUSED */
2211 void
2212 cpu_change_speed(uint64_t divisor, uint64_t arg2)
2213 { ASSERT(0); }
2214 
2215 void
2216 cpu_init_cache_scrub(void)
2217 {}
2218 
2219 /* ARGSUSED */
2220 int
2221 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
2222 {
2223 	if (&plat_get_mem_sid) {
2224 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
2225 	} else {
2226 		return (ENOTSUP);
2227 	}
2228 }
2229 
2230 /* ARGSUSED */
2231 int
2232 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
2233 {
2234 	if (&plat_get_mem_addr) {
2235 		return (plat_get_mem_addr(unum, sid, offset, addrp));
2236 	} else {
2237 		return (ENOTSUP);
2238 	}
2239 }
2240 
2241 /* ARGSUSED */
2242 int
2243 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
2244 {
2245 	if (&plat_get_mem_offset) {
2246 		return (plat_get_mem_offset(flt_addr, offp));
2247 	} else {
2248 		return (ENOTSUP);
2249 	}
2250 }
2251 
2252 /*ARGSUSED*/
2253 void
2254 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2255 { ASSERT(0); }
2256 
2257 /*ARGSUSED*/
2258 void
2259 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
2260 { ASSERT(0); }
2261 
2262 /*ARGSUSED*/
2263 void
2264 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
2265 { ASSERT(0); }
2266 
2267 /*ARGSUSED*/
2268 int
2269 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
2270     errorq_elem_t *eqep, size_t afltoffset)
2271 {
2272 	ASSERT(0);
2273 	return (0);
2274 }
2275 
2276 /*ARGSUSED*/
2277 char *
2278 flt_to_error_type(struct async_flt *aflt)
2279 {
2280 	ASSERT(0);
2281 	return (NULL);
2282 }
2283