xref: /titanic_44/usr/src/uts/sun4v/cpu/niagara2.c (revision 07d06da50d310a325b457d6330165aebab1e0064)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/archsystm.h>
30 #include <sys/machparam.h>
31 #include <sys/machsystm.h>
32 #include <sys/cpu.h>
33 #include <sys/elf_SPARC.h>
34 #include <vm/hat_sfmmu.h>
35 #include <vm/page.h>
36 #include <vm/vm_dep.h>
37 #include <sys/cpuvar.h>
38 #include <sys/async.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/dditypes.h>
42 #include <sys/sunddi.h>
43 #include <sys/cpu_module.h>
44 #include <sys/prom_debug.h>
45 #include <sys/vmsystm.h>
46 #include <sys/prom_plat.h>
47 #include <sys/sysmacros.h>
48 #include <sys/intreg.h>
49 #include <sys/machtrap.h>
50 #include <sys/ontrap.h>
51 #include <sys/ivintr.h>
52 #include <sys/atomic.h>
53 #include <sys/panic.h>
54 #include <sys/dtrace.h>
55 #include <sys/simulate.h>
56 #include <sys/fault.h>
57 #include <sys/niagara2regs.h>
58 #include <sys/hsvc.h>
59 #include <sys/trapstat.h>
60 #include <sys/mutex_impl.h>
61 
62 uint_t root_phys_addr_lo_mask = 0xffffffffU;
63 #if defined(NIAGARA2_IMPL)
64 char cpu_module_name[] = "SUNW,UltraSPARC-T2";
65 #elif defined(VFALLS_IMPL)
66 char cpu_module_name[] = "SUNW,UltraSPARC-T2+";
67 #elif defined(KT_IMPL)
68 char cpu_module_name[] = "SUNW,UltraSPARC-KT";
69 #endif
70 
71 /*
72  * Hypervisor services information for the NIAGARA2 and Victoria Falls
73  * CPU module
74  */
75 static boolean_t cpu_hsvc_available = B_TRUE;
76 static uint64_t cpu_sup_minor;		/* Supported minor number */
77 #if defined(NIAGARA2_IMPL)
78 static hsvc_info_t cpu_hsvc = {
79 	HSVC_REV_1, NULL, HSVC_GROUP_NIAGARA2_CPU, NIAGARA2_HSVC_MAJOR,
80 	NIAGARA2_HSVC_MINOR, cpu_module_name
81 };
82 #elif defined(VFALLS_IMPL)
83 static hsvc_info_t cpu_hsvc = {
84 	HSVC_REV_1, NULL, HSVC_GROUP_VFALLS_CPU, VFALLS_HSVC_MAJOR,
85 	VFALLS_HSVC_MINOR, cpu_module_name
86 };
87 #elif defined(KT_IMPL)
88 static hsvc_info_t cpu_hsvc = {
89 	HSVC_REV_1, NULL, HSVC_GROUP_KT_CPU, KT_HSVC_MAJOR,
90 	KT_HSVC_MINOR, cpu_module_name
91 };
92 #endif
93 
94 void
95 cpu_setup(void)
96 {
97 	extern int mmu_exported_pagesize_mask;
98 	extern int cpc_has_overflow_intr;
99 	extern size_t contig_mem_prealloc_base_size;
100 	int status;
101 
102 	/*
103 	 * Negotiate the API version for Niagara2 specific hypervisor
104 	 * services.
105 	 */
106 	status = hsvc_register(&cpu_hsvc, &cpu_sup_minor);
107 	if (status != 0) {
108 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor services "
109 		    "group: 0x%lx major: 0x%lx minor: 0x%lx errno: %d",
110 		    cpu_hsvc.hsvc_modname, cpu_hsvc.hsvc_group,
111 		    cpu_hsvc.hsvc_major, cpu_hsvc.hsvc_minor, status);
112 		cpu_hsvc_available = B_FALSE;
113 	}
114 
115 	/*
116 	 * The setup common to all CPU modules is done in cpu_setup_common
117 	 * routine.
118 	 */
119 	cpu_setup_common(NULL);
120 
121 	/*
122 	 * Initialize the cpu_hwcap_flags for N2 and VF if it is not already
123 	 * set in cpu_setup_common() by the hwcap MD info. Note that this MD
124 	 * info may not be available for N2/VF.
125 	 */
126 	if (cpu_hwcap_flags == 0) {
127 #ifdef KT_IMPL
128 		/*
129 		 * This should not happen since hwcap MD info is always
130 		 * available for KT platforms.
131 		 */
132 		ASSERT(cpu_hwcap_flags != 0);	/* panic in DEBUG mode */
133 		cpu_hwcap_flags |= AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF;
134 #endif /* KT_IMPL */
135 		cpu_hwcap_flags |= AV_SPARC_VIS | AV_SPARC_VIS2 |
136 		    AV_SPARC_ASI_BLK_INIT | AV_SPARC_POPC;
137 	}
138 
139 	cache |= (CACHE_PTAG | CACHE_IOCOHERENT);
140 
141 	if ((mmu_exported_pagesize_mask &
142 	    DEFAULT_SUN4V_MMU_PAGESIZE_MASK) !=
143 	    DEFAULT_SUN4V_MMU_PAGESIZE_MASK)
144 		cmn_err(CE_PANIC, "machine description"
145 		    " does not have required sun4v page sizes"
146 		    " 8K, 64K and 4M: MD mask is 0x%x",
147 		    mmu_exported_pagesize_mask);
148 
149 	/*
150 	 * Niagara2 supports a 48-bit subset of the full 64-bit virtual
151 	 * address space. Virtual addresses between 0x0000800000000000
152 	 * and 0xffff.7fff.ffff.ffff inclusive lie within a "VA Hole"
153 	 * and must never be mapped. In addition, software must not use
154 	 * pages within 4GB of the VA hole as instruction pages to
155 	 * avoid problems with prefetching into the VA hole.
156 	 */
157 	hole_start = (caddr_t)((1ull << (va_bits - 1)) - (1ull << 32));
158 	hole_end = (caddr_t)((0ull - (1ull << (va_bits - 1))) + (1ull << 32));
159 
160 	/*
161 	 * Niagara2 has a performance counter overflow interrupt
162 	 */
163 	cpc_has_overflow_intr = 1;
164 
165 	/*
166 	 * Enable 4M pages for OOB.
167 	 */
168 	max_uheap_lpsize = MMU_PAGESIZE4M;
169 	max_ustack_lpsize = MMU_PAGESIZE4M;
170 	max_privmap_lpsize = MMU_PAGESIZE4M;
171 
172 #ifdef SUN4V_CONTIG_MEM_PREALLOC_SIZE_MB
173 	/*
174 	 * Use CPU Makefile specific compile time define (if exists)
175 	 * to add to the contig preallocation size.
176 	 */
177 	contig_mem_prealloc_base_size = MB(SUN4V_CONTIG_MEM_PREALLOC_SIZE_MB);
178 #endif
179 }
180 
181 /*
182  * Set the magic constants of the implementation.
183  */
184 void
185 cpu_fiximp(struct cpu_node *cpunode)
186 {
187 	/*
188 	 * The Cache node is optional in MD. Therefore in case "Cache"
189 	 * node does not exists in MD, set the default L2 cache associativity,
190 	 * size, linesize.
191 	 */
192 	if (cpunode->ecache_size == 0)
193 		cpunode->ecache_size = L2CACHE_SIZE;
194 	if (cpunode->ecache_linesize == 0)
195 		cpunode->ecache_linesize = L2CACHE_LINESIZE;
196 	if (cpunode->ecache_associativity == 0)
197 		cpunode->ecache_associativity = L2CACHE_ASSOCIATIVITY;
198 }
199 
200 void
201 cpu_map_exec_units(struct cpu *cp)
202 {
203 	ASSERT(MUTEX_HELD(&cpu_lock));
204 
205 	/*
206 	 * The cpu_ipipe and cpu_fpu fields are initialized based on
207 	 * the execution unit sharing information from the MD. They
208 	 * default to the CPU id in the absence of such information.
209 	 */
210 	cp->cpu_m.cpu_ipipe = cpunodes[cp->cpu_id].exec_unit_mapping;
211 	if (cp->cpu_m.cpu_ipipe == NO_EU_MAPPING_FOUND)
212 		cp->cpu_m.cpu_ipipe = (id_t)(cp->cpu_id);
213 
214 	cp->cpu_m.cpu_fpu = cpunodes[cp->cpu_id].fpu_mapping;
215 	if (cp->cpu_m.cpu_fpu == NO_EU_MAPPING_FOUND)
216 		cp->cpu_m.cpu_fpu = (id_t)(cp->cpu_id);
217 
218 	/*
219 	 * Niagara 2 defines the core to be at the FPU level
220 	 */
221 	cp->cpu_m.cpu_core = cp->cpu_m.cpu_fpu;
222 
223 	/*
224 	 * The cpu_chip field is initialized based on the information
225 	 * in the MD and assume that all cpus within a chip
226 	 * share the same L2 cache. If no such info is available, we
227 	 * set the cpu to belong to the defacto chip 0.
228 	 */
229 	cp->cpu_m.cpu_mpipe = cpunodes[cp->cpu_id].l2_cache_mapping;
230 	if (cp->cpu_m.cpu_mpipe == NO_L2_CACHE_MAPPING_FOUND)
231 		cp->cpu_m.cpu_mpipe = CPU_L2_CACHEID_INVALID;
232 
233 	cp->cpu_m.cpu_chip = cpunodes[cp->cpu_id].l2_cache_mapping;
234 	if (cp->cpu_m.cpu_chip == NO_L2_CACHE_MAPPING_FOUND)
235 		cp->cpu_m.cpu_chip = CPU_CHIPID_INVALID;
236 }
237 
238 static int cpucnt;
239 
240 void
241 cpu_init_private(struct cpu *cp)
242 {
243 	extern void niagara_kstat_init(void);
244 
245 	ASSERT(MUTEX_HELD(&cpu_lock));
246 
247 	cpu_map_exec_units(cp);
248 
249 	if ((cpucnt++ == 0) && (cpu_hsvc_available == B_TRUE))
250 		(void) niagara_kstat_init();
251 
252 	mutex_delay = rdccr_delay;
253 }
254 
255 /*ARGSUSED*/
256 void
257 cpu_uninit_private(struct cpu *cp)
258 {
259 	extern void niagara_kstat_fini(void);
260 
261 	ASSERT(MUTEX_HELD(&cpu_lock));
262 	if ((--cpucnt == 0) && (cpu_hsvc_available == B_TRUE))
263 		(void) niagara_kstat_fini();
264 }
265 
266 /*
267  * On Niagara2, any flush will cause all preceding stores to be
268  * synchronized wrt the i$, regardless of address or ASI.  In fact,
269  * the address is ignored, so we always flush address 0.
270  */
271 /*ARGSUSED*/
272 void
273 dtrace_flush_sec(uintptr_t addr)
274 {
275 	doflush(0);
276 }
277 
278 /*
279  * Trapstat support for Niagara2 processor
280  * The Niagara2 provides HWTW support for TSB lookup and with HWTW
281  * enabled no TSB hit information will be available. Therefore setting
282  * the time spent in TLB miss handler for TSB hits to 0.
283  */
284 int
285 cpu_trapstat_conf(int cmd)
286 {
287 	int status = 0;
288 
289 	switch (cmd) {
290 	case CPU_TSTATCONF_INIT:
291 	case CPU_TSTATCONF_FINI:
292 	case CPU_TSTATCONF_ENABLE:
293 	case CPU_TSTATCONF_DISABLE:
294 		break;
295 	default:
296 		status = EINVAL;
297 		break;
298 	}
299 	return (status);
300 }
301 
302 void
303 cpu_trapstat_data(void *buf, uint_t tstat_pgszs)
304 {
305 	tstat_pgszdata_t	*tstatp = (tstat_pgszdata_t *)buf;
306 	int	i;
307 
308 	for (i = 0; i < tstat_pgszs; i++, tstatp++) {
309 		tstatp->tpgsz_kernel.tmode_itlb.ttlb_tlb.tmiss_count = 0;
310 		tstatp->tpgsz_kernel.tmode_itlb.ttlb_tlb.tmiss_time = 0;
311 		tstatp->tpgsz_user.tmode_itlb.ttlb_tlb.tmiss_count = 0;
312 		tstatp->tpgsz_user.tmode_itlb.ttlb_tlb.tmiss_time = 0;
313 		tstatp->tpgsz_kernel.tmode_dtlb.ttlb_tlb.tmiss_count = 0;
314 		tstatp->tpgsz_kernel.tmode_dtlb.ttlb_tlb.tmiss_time = 0;
315 		tstatp->tpgsz_user.tmode_dtlb.ttlb_tlb.tmiss_count = 0;
316 		tstatp->tpgsz_user.tmode_dtlb.ttlb_tlb.tmiss_time = 0;
317 	}
318 }
319 
320 /*
321  * Page coloring support for hashed cache index mode
322  */
323 
324 /*
325  * Node id bits from machine description (MD).  Node id distinguishes
326  * local versus remote memory. Because of MPO, page allocation does
327  * not cross node boundaries. Therefore, remove the node id bits from
328  * the color, since they are fixed. Either bit 30, or 31:30 in
329  * Victoria Falls processors.
330  * The number of node id bits is always 0 in Niagara2.
331  */
332 typedef struct n2color {
333 	uchar_t nnbits;	/* number of node id bits */
334 	uchar_t nnmask; /* mask for node id bits */
335 	uchar_t	lomask;	/* mask for bits below node id */
336 	uchar_t lobits;	/* number of bits below node id */
337 } n2color_t;
338 
339 n2color_t n2color[MMU_PAGE_SIZES];
340 static uchar_t nhbits[] = {7, 7, 6, 5, 5, 5};
341 
342 /*
343  * Remove node id bits from color bits 32:28.
344  * This will reduce the number of colors.
345  * No change if number of node bits is zero.
346  */
347 static inline uint_t
348 n2_hash2color(uint_t color, uchar_t szc)
349 {
350 	n2color_t m = n2color[szc];
351 
352 	if (m.nnbits > 0) {
353 		color = ((color >> m.nnbits) & ~m.lomask) | (color & m.lomask);
354 		ASSERT((color & ~(hw_page_array[szc].hp_colors - 1)) == 0);
355 	}
356 
357 	return (color);
358 }
359 
360 /*
361  * Restore node id bits into page color.
362  * This will increase the number of colors to match N2.
363  * No change if number of node bits is zero.
364  */
365 static inline uint_t
366 n2_color2hash(uint_t color, uchar_t szc, uint_t node)
367 {
368 	n2color_t m = n2color[szc];
369 
370 	if (m.nnbits > 0) {
371 		color = ((color & ~m.lomask) << m.nnbits) | (color & m.lomask);
372 		color |= (node & m.nnmask) << m.lobits;
373 	}
374 
375 	return (color);
376 }
377 
378 /* NI2 L2$ index is pa[32:28]^pa[17:13].pa[19:18]^pa[12:11].pa[10:6] */
379 
380 /*
381  * iterator NULL means pfn is VA, do not adjust ra_to_pa
382  * iterator (-1) means pfn is RA, need to convert to PA
383  * iterator non-null means pfn is RA, use ra_to_pa
384  */
385 uint_t
386 page_pfn_2_color_cpu(pfn_t pfn, uchar_t szc, void *cookie)
387 {
388 	mem_node_iterator_t *it = cookie;
389 	uint_t color;
390 
391 	ASSERT(szc <= TTE256M);
392 
393 	if (it == ((mem_node_iterator_t *)(-1))) {
394 		pfn = plat_rapfn_to_papfn(pfn);
395 	} else if (it != NULL) {
396 		ASSERT(pfn >= it->mi_mblock_base && pfn <= it->mi_mblock_end);
397 		pfn = pfn + it->mi_ra_to_pa;
398 	}
399 	pfn = PFN_BASE(pfn, szc);
400 	color = ((pfn >> 15) ^ pfn) & 0x1f;
401 	if (szc < TTE4M) {
402 		/* 19:18 */
403 		color = (color << 2) | ((pfn >> 5) & 0x3);
404 		if (szc > TTE64K)
405 			color >>= 1;    /* 19 */
406 	}
407 	return (n2_hash2color(color, szc));
408 }
409 
410 static uint_t
411 page_papfn_2_color_cpu(pfn_t papfn, uchar_t szc)
412 {
413 	uint_t color;
414 
415 	ASSERT(szc <= TTE256M);
416 
417 	papfn = PFN_BASE(papfn, szc);
418 	color = ((papfn >> 15) ^ papfn) & 0x1f;
419 	if (szc < TTE4M) {
420 		/* 19:18 */
421 		color = (color << 2) | ((papfn >> 5) & 0x3);
422 		if (szc > TTE64K)
423 			color >>= 1;    /* 19 */
424 	}
425 	return (color);
426 }
427 
428 #if TTE256M != 5
429 #error TTE256M is not 5
430 #endif
431 
432 uint_t
433 page_get_nsz_color_mask_cpu(uchar_t szc, uint_t mask)
434 {
435 	static uint_t ni2_color_masks[5] = {0x63, 0x1e, 0x3e, 0x1f, 0x1f};
436 	ASSERT(szc < TTE256M);
437 	mask = n2_color2hash(mask, szc, 0);
438 	mask &= ni2_color_masks[szc];
439 	if (szc == TTE64K || szc == TTE512K)
440 		mask >>= 1;
441 	return (n2_hash2color(mask, szc + 1));
442 }
443 
444 uint_t
445 page_get_nsz_color_cpu(uchar_t szc, uint_t color)
446 {
447 	ASSERT(szc < TTE256M);
448 	color = n2_color2hash(color, szc, 0);
449 	if (szc == TTE64K || szc == TTE512K)
450 		color >>= 1;
451 	return (n2_hash2color(color, szc + 1));
452 }
453 
454 uint_t
455 page_get_color_shift_cpu(uchar_t szc, uchar_t nszc)
456 {
457 	uint_t s;
458 	ASSERT(nszc >= szc);
459 	ASSERT(nszc <= TTE256M);
460 
461 	s = nhbits[szc] - n2color[szc].nnbits;
462 	s -= nhbits[nszc] - n2color[nszc].nnbits;
463 
464 	return (s);
465 }
466 
467 uint_t
468 page_convert_color_cpu(uint_t ncolor, uchar_t szc, uchar_t nszc)
469 {
470 	uint_t color;
471 
472 	ASSERT(nszc > szc);
473 	ASSERT(nszc <= TTE256M);
474 	ncolor = n2_color2hash(ncolor, nszc, 0);
475 	color = ncolor << (nhbits[szc] - nhbits[nszc]);
476 	color = n2_hash2color(color, szc);
477 	return (color);
478 }
479 
480 #define	PAPFN_2_MNODE(pfn) \
481 	(((pfn) & it->mi_mnode_pfn_mask) >> it->mi_mnode_pfn_shift)
482 
483 /*ARGSUSED*/
484 pfn_t
485 page_next_pfn_for_color_cpu(pfn_t pfn, uchar_t szc, uint_t color,
486     uint_t ceq_mask, uint_t color_mask, void *cookie)
487 {
488 	mem_node_iterator_t *it = cookie;
489 	pfn_t pstep = PNUM_SIZE(szc);
490 	pfn_t npfn, pfn_ceq_mask, pfn_color;
491 	pfn_t tmpmask, mask = (pfn_t)-1;
492 	uint_t pfnmn;
493 
494 	ASSERT((color & ~ceq_mask) == 0);
495 	ASSERT(pfn >= it->mi_mblock_base && pfn <= it->mi_mblock_end);
496 
497 	/* convert RA to PA for accurate color calculation */
498 	if (it->mi_init) {
499 		/* first call after it, so cache these values */
500 		it->mi_hash_ceq_mask =
501 		    n2_color2hash(ceq_mask, szc, it->mi_mnode_mask);
502 		it->mi_hash_color =
503 		    n2_color2hash(color, szc, it->mi_mnode);
504 		it->mi_init = 0;
505 	} else {
506 		ASSERT(it->mi_hash_ceq_mask ==
507 		    n2_color2hash(ceq_mask, szc, it->mi_mnode_mask));
508 		ASSERT(it->mi_hash_color ==
509 		    n2_color2hash(color, szc, it->mi_mnode));
510 	}
511 	ceq_mask = it->mi_hash_ceq_mask;
512 	color = it->mi_hash_color;
513 	pfn += it->mi_ra_to_pa;
514 
515 	/* restart here when we switch memblocks */
516 next_mem_block:
517 	pfnmn = PAPFN_2_MNODE(pfn);
518 	if ((((page_papfn_2_color_cpu(pfn, szc) ^ color) & ceq_mask) == 0) &&
519 	    (pfnmn == it->mi_mnode)) {
520 
521 		/* we start from the page with correct color and mnode */
522 		if (szc >= TTE512K) {
523 			if (szc >= TTE4M) {
524 				/* page color is PA[32:28] */
525 				pfn_ceq_mask = ceq_mask << 15;
526 			} else {
527 				/* page color is PA[32:28].PA[19:19] */
528 				pfn_ceq_mask = ((ceq_mask & 1) << 6) |
529 				    ((ceq_mask >> 1) << 15);
530 			}
531 			/*
532 			 * Preserve mnode bits in case they are not part of the
533 			 * color mask (eg., 8GB interleave, mnode bits 34:33).
534 			 */
535 			pfn_ceq_mask |= it->mi_mnode_pfn_mask;
536 			npfn = ADD_MASKED(pfn, pstep, pfn_ceq_mask, mask);
537 			goto done;
538 		} else {
539 			/*
540 			 * We deal 64K or 8K page. Check if we could the
541 			 * satisfy the request without changing PA[32:28]
542 			 */
543 			pfn_ceq_mask = ((ceq_mask & 3) << 5) | (ceq_mask >> 2);
544 			pfn_ceq_mask |= it->mi_mnode_pfn_mask;
545 			npfn = ADD_MASKED(pfn, pstep, pfn_ceq_mask, mask);
546 
547 			if ((((npfn ^ pfn) >> 15) & 0x1f) == 0)
548 				goto done;
549 
550 			/*
551 			 * for next pfn we have to change bits PA[32:28]
552 			 * set PA[63:28] and PA[19:18] of the next pfn
553 			 */
554 			npfn = (pfn >> 15) << 15;
555 			npfn |= (ceq_mask & color & 3) << 5;
556 			pfn_ceq_mask = (szc == TTE8K) ? 0 :
557 			    (ceq_mask & 0x1c) << 13;
558 			pfn_ceq_mask |= it->mi_mnode_pfn_mask;
559 			npfn = ADD_MASKED(npfn, (1 << 15), pfn_ceq_mask, mask);
560 
561 			/*
562 			 * set bits PA[17:13] to match the color
563 			 */
564 			npfn |= ((npfn >> 15) ^ (color >> 2)) & (ceq_mask >> 2);
565 			goto done;
566 		}
567 	}
568 
569 	/*
570 	 * we start from the page with incorrect color - rare case
571 	 */
572 	if (szc >= TTE512K) {
573 		if (szc >= TTE4M) {
574 			/* page color is in bits PA[32:28] */
575 			npfn = ((pfn >> 20) << 20) | (color << 15);
576 			pfn_ceq_mask = (ceq_mask << 15) | 0x7fff;
577 		} else {
578 			/* try get the right color by changing bit PA[19:19] */
579 			npfn = pfn + pstep;
580 			pfnmn = PAPFN_2_MNODE(npfn);
581 			if ((((page_papfn_2_color_cpu(npfn, szc) ^ color) &
582 			    ceq_mask) == 0) && (pfnmn == it->mi_mnode))
583 				goto done;
584 
585 			/* page color is PA[32:28].PA[19:19] */
586 			pfn_ceq_mask = ((ceq_mask & 1) << 6) |
587 			    ((ceq_mask >> 1) << 15) | (0xff << 7);
588 			pfn_color = ((color & 1) << 6) | ((color >> 1) << 15);
589 			npfn = ((pfn >> 20) << 20) | pfn_color;
590 		}
591 
592 		/* Fix mnode if necessary */
593 		if ((pfnmn = PAPFN_2_MNODE(npfn)) != it->mi_mnode)
594 			npfn += ((it->mi_mnode - pfnmn) & it->mi_mnode_mask) <<
595 			    it->mi_mnode_pfn_shift;
596 
597 		/*
598 		 * Preserve mnode bits in case they are not part of the color
599 		 * mask eg 8GB interleave, mnode bits 34:33).
600 		 */
601 		pfn_ceq_mask |= it->mi_mnode_pfn_mask;
602 		while (npfn <= pfn) {
603 			npfn = ADD_MASKED(npfn, pstep, pfn_ceq_mask, mask);
604 		}
605 		goto done;
606 	}
607 
608 	/*
609 	 *  We deal 64K or 8K page of incorrect color.
610 	 * Try correcting color without changing PA[32:28]
611 	 */
612 	pfn_ceq_mask = ((ceq_mask & 3) << 5) | (ceq_mask >> 2);
613 	pfn_color = ((color & 3) << 5) | (color >> 2);
614 	if (pfnmn == it->mi_mnode) {
615 		npfn = (pfn & ~(pfn_t)0x7f);
616 		npfn |= (((pfn >> 15) & 0x1f) ^ pfn_color) & pfn_ceq_mask;
617 		npfn = (szc == TTE64K) ? (npfn & ~(pfn_t)0x7) : npfn;
618 
619 		if (((page_papfn_2_color_cpu(npfn, szc) ^ color) &
620 		    ceq_mask) == 0) {
621 			/* the color is fixed - find the next page */
622 			pfn_ceq_mask |= it->mi_mnode_pfn_mask;
623 			while (npfn <= pfn) {
624 				npfn = ADD_MASKED(npfn, pstep, pfn_ceq_mask,
625 				    mask);
626 			}
627 			if ((((npfn ^ pfn) >> 15) & 0x1f) == 0)
628 				goto done;
629 		}
630 	}
631 
632 	/* to fix the color need to touch PA[32:28] */
633 	npfn = (szc == TTE8K) ? ((pfn >> 15) << 15) :
634 	    (((pfn >> 18) << 18) | ((color & 0x1c) << 13));
635 
636 	/* fix mnode if input pfn is in the wrong mnode. */
637 	if ((pfnmn = PAPFN_2_MNODE(npfn)) != it->mi_mnode) {
638 		npfn += ((it->mi_mnode - pfnmn) & it->mi_mnode_mask) <<
639 		    it->mi_mnode_pfn_shift;
640 	}
641 
642 	tmpmask = (szc == TTE8K) ? 0 : (ceq_mask & 0x1c) << 13;
643 	tmpmask |= it->mi_mnode_pfn_mask;
644 
645 	while (npfn <= pfn) {
646 		npfn = ADD_MASKED(npfn, (1 << 15), tmpmask, mask);
647 	}
648 
649 	/* set bits PA[19:13] to match the color */
650 	npfn |= (((npfn >> 15) & 0x1f) ^ pfn_color) & pfn_ceq_mask;
651 	npfn = (szc == TTE64K) ? (npfn & ~(pfn_t)0x7) : npfn;
652 
653 done:
654 	ASSERT(((page_papfn_2_color_cpu(npfn, szc) ^ color) & ceq_mask) == 0);
655 	ASSERT(PAPFN_2_MNODE(npfn) == it->mi_mnode);
656 
657 	/* PA to RA */
658 	npfn -= it->mi_ra_to_pa;
659 
660 	/* check for possible memblock switch */
661 	if (npfn > it->mi_mblock_end) {
662 		pfn = plat_mem_node_iterator_init(npfn, it->mi_mnode, szc, it,
663 		    0);
664 		if (pfn == (pfn_t)-1)
665 			return (pfn);
666 		ASSERT(pfn >= it->mi_mblock_base && pfn <= it->mi_mblock_end);
667 		pfn += it->mi_ra_to_pa;
668 		goto next_mem_block;
669 	}
670 
671 	return (npfn);
672 }
673 
674 /*
675  * init page coloring
676  * VF encodes node_id for an L-group in either bit 30 or 31:30,
677  * which effectively reduces the number of colors available per mnode.
678  */
679 void
680 page_coloring_init_cpu()
681 {
682 	int i;
683 	uchar_t id;
684 	uchar_t lo;
685 	uchar_t hi;
686 	n2color_t m;
687 	mem_node_iterator_t it;
688 	static uchar_t idmask[] = {0, 0x7, 0x1f, 0x1f, 0x1f, 0x1f};
689 
690 	for (i = 0; i < max_mem_nodes; i++) {
691 		memset(&it, 0, sizeof (it));
692 		if (plat_mem_node_iterator_init(0, i, 0, &it, 1) != (pfn_t)-1)
693 			break;
694 	}
695 	ASSERT(i < max_mem_nodes);
696 	for (i = 0; i < mmu_page_sizes; i++) {
697 		(void) memset(&m, 0, sizeof (m));
698 		id = it.mi_mnode_pfn_mask >> 15;	/* node id mask */
699 		id &= idmask[i];
700 		lo = lowbit(id);
701 		if (lo > 0) {
702 			hi = highbit(id);
703 			m.nnbits = hi - lo + 1;
704 			m.nnmask = (1 << m.nnbits) - 1;
705 			lo += nhbits[i] - 5;
706 			m.lomask = (1 << (lo - 1)) - 1;
707 			m.lobits = lo - 1;
708 		}
709 		hw_page_array[i].hp_colors = 1 << (nhbits[i] - m.nnbits);
710 		n2color[i] = m;
711 	}
712 }
713 
714 /*
715  * group colorequiv colors on N2 by low order bits of the color first
716  */
717 void
718 page_set_colorequiv_arr_cpu(void)
719 {
720 	static uint_t nequiv_shades_log2[MMU_PAGE_SIZES] = {2, 5, 0, 0, 0, 0};
721 
722 	nequiv_shades_log2[1] -= n2color[1].nnbits;
723 	if (colorequiv > 1) {
724 		int i;
725 		uint_t sv_a = lowbit(colorequiv) - 1;
726 
727 		if (sv_a > 15)
728 			sv_a = 15;
729 
730 		for (i = 0; i < MMU_PAGE_SIZES; i++) {
731 			uint_t colors;
732 			uint_t a = sv_a;
733 
734 			if ((colors = hw_page_array[i].hp_colors) <= 1)
735 				continue;
736 			while ((colors >> a) == 0)
737 				a--;
738 			if (a > (colorequivszc[i] & 0xf) +
739 			    (colorequivszc[i] >> 4)) {
740 				if (a <= nequiv_shades_log2[i]) {
741 					colorequivszc[i] = (uchar_t)a;
742 				} else {
743 					colorequivszc[i] =
744 					    ((a - nequiv_shades_log2[i]) << 4) |
745 					    nequiv_shades_log2[i];
746 				}
747 			}
748 		}
749 	}
750 }
751