xref: /illumos-gate/usr/src/uts/sun4v/os/fillsysinfo.c (revision aff4bce51ecc47df7e5a6351b7cee6bc20408c63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/errno.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/cpu.h>
30 #include <sys/cpuvar.h>
31 #include <sys/clock.h>
32 #include <sys/promif.h>
33 #include <sys/promimpl.h>
34 #include <sys/systm.h>
35 #include <sys/machsystm.h>
36 #include <sys/debug.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/cpu_module.h>
40 #include <sys/kobj.h>
41 #include <sys/cmp.h>
42 #include <sys/async.h>
43 #include <vm/page.h>
44 #include <vm/vm_dep.h>
45 #include <vm/hat_sfmmu.h>
46 #include <sys/sysmacros.h>
47 #include <sys/mach_descrip.h>
48 #include <sys/mdesc.h>
49 #include <sys/archsystm.h>
50 #include <sys/error.h>
51 #include <sys/mmu.h>
52 #include <sys/bitmap.h>
53 #include <sys/intreg.h>
54 
55 struct cpu_node cpunodes[NCPU];
56 
57 uint64_t cpu_q_entries;
58 uint64_t dev_q_entries;
59 uint64_t cpu_rq_entries;
60 uint64_t cpu_nrq_entries;
61 uint64_t ncpu_guest_max;
62 
63 void fill_cpu(md_t *, mde_cookie_t);
64 
65 static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
66 static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t);
67 static uint64_t	get_mmu_shcontexts(md_t *, mde_cookie_t);
68 static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
69 static int check_mmu_pgsz_search(md_t *, mde_cookie_t);
70 static char *construct_isalist(md_t *, mde_cookie_t, char **);
71 static void init_md_broken(md_t *, mde_cookie_t *);
72 static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
73     uint64_t *);
74 static void get_hwcaps(md_t *, mde_cookie_t);
75 static void get_weakest_mem_model(md_t *, mde_cookie_t);
76 static void get_q_sizes(md_t *, mde_cookie_t);
77 static void get_va_bits(md_t *, mde_cookie_t);
78 static size_t get_ra_limit(md_t *, mde_cookie_t);
79 static int get_l2_cache_node_count(md_t *);
80 static unsigned long names2bits(char *tokens, size_t tokenslen,
81     char *bit_formatter, char *warning);
82 
83 uint64_t	system_clock_freq;
84 uint_t		niommu_tsbs = 0;
85 
86 static int n_l2_caches = 0;
87 
88 /* prevent compilation with VAC defined */
89 #ifdef VAC
90 #error "The sun4v architecture does not support VAC"
91 #endif
92 
93 #define	S_VAC_SIZE	MMU_PAGESIZE
94 #define	S_VAC_SHIFT	MMU_PAGESHIFT
95 
96 int		vac_size = S_VAC_SIZE;
97 uint_t		vac_mask = MMU_PAGEMASK & (S_VAC_SIZE - 1);
98 int		vac_shift = S_VAC_SHIFT;
99 uintptr_t	shm_alignment = S_VAC_SIZE;
100 
101 void
102 map_wellknown_devices()
103 {
104 }
105 
106 void
107 fill_cpu(md_t *mdp, mde_cookie_t cpuc)
108 {
109 	struct cpu_node *cpunode;
110 	uint64_t cpuid;
111 	uint64_t clk_freq;
112 	char *namebuf;
113 	char *namebufp;
114 	int namelen;
115 	uint64_t associativity = 0, linesize = 0, size = 0;
116 
117 	if (md_get_prop_val(mdp, cpuc, "id", &cpuid)) {
118 		return;
119 	}
120 
121 	/* All out-of-range cpus will be stopped later. */
122 	if (cpuid >= NCPU) {
123 		cmn_err(CE_CONT, "fill_cpu: out of range cpuid %ld - "
124 		    "cpu excluded from configuration\n", cpuid);
125 
126 		return;
127 	}
128 
129 	cpunode = &cpunodes[cpuid];
130 	cpunode->cpuid = (int)cpuid;
131 	cpunode->device_id = cpuid;
132 
133 	if (sizeof (cpunode->fru_fmri) > strlen(CPU_FRU_FMRI))
134 		(void) strcpy(cpunode->fru_fmri, CPU_FRU_FMRI);
135 
136 	if (md_get_prop_data(mdp, cpuc,
137 	    "compatible", (uint8_t **)&namebuf, &namelen)) {
138 		cmn_err(CE_PANIC, "fill_cpu: Cannot read compatible "
139 		    "property");
140 	}
141 	namebufp = namebuf;
142 	if (strncmp(namebufp, "SUNW,", 5) == 0)
143 		namebufp += 5;
144 	if (strlen(namebufp) > sizeof (cpunode->name))
145 		cmn_err(CE_PANIC, "Compatible property too big to "
146 		    "fit into the cpunode name buffer");
147 	(void) strcpy(cpunode->name, namebufp);
148 
149 	if (md_get_prop_val(mdp, cpuc,
150 	    "clock-frequency", &clk_freq)) {
151 			clk_freq = 0;
152 	}
153 	cpunode->clock_freq = clk_freq;
154 
155 	ASSERT(cpunode->clock_freq != 0);
156 	/*
157 	 * Compute scaling factor based on rate of %tick. This is used
158 	 * to convert from ticks derived from %tick to nanoseconds. See
159 	 * comment in sun4u/sys/clock.h for details.
160 	 */
161 	cpunode->tick_nsec_scale = (uint_t)(((uint64_t)NANOSEC <<
162 	    (32 - TICK_NSEC_SHIFT)) / cpunode->clock_freq);
163 
164 	/*
165 	 * The nodeid is not used in sun4v at all. Setting it
166 	 * to positive value to make starting of slave CPUs
167 	 * code happy.
168 	 */
169 	cpunode->nodeid = cpuid + 1;
170 
171 	/*
172 	 * Obtain the L2 cache information from MD.
173 	 * If "Cache" node exists, then set L2 cache properties
174 	 * as read from MD.
175 	 * If node does not exists, then set the L2 cache properties
176 	 * in individual CPU module.
177 	 */
178 	if ((!get_l2_cache_info(mdp, cpuc,
179 	    &associativity, &size, &linesize)) ||
180 	    associativity == 0 || size == 0 || linesize == 0) {
181 		cpu_fiximp(cpunode);
182 	} else {
183 		/*
184 		 * Do not expect L2 cache properties to be bigger
185 		 * than 32-bit quantity.
186 		 */
187 		cpunode->ecache_associativity = (int)associativity;
188 		cpunode->ecache_size = (int)size;
189 		cpunode->ecache_linesize = (int)linesize;
190 	}
191 
192 	cpunode->ecache_setsize =
193 	    cpunode->ecache_size / cpunode->ecache_associativity;
194 
195 	/*
196 	 * Initialize the mapping for exec unit, chip and core.
197 	 */
198 	cpunode->exec_unit_mapping = NO_EU_MAPPING_FOUND;
199 	cpunode->l2_cache_mapping = NO_MAPPING_FOUND;
200 	cpunode->core_mapping = NO_CORE_MAPPING_FOUND;
201 
202 	if (ecache_setsize == 0)
203 		ecache_setsize = cpunode->ecache_setsize;
204 	if (ecache_alignsize == 0)
205 		ecache_alignsize = cpunode->ecache_linesize;
206 
207 }
208 
209 void
210 empty_cpu(int cpuid)
211 {
212 	bzero(&cpunodes[cpuid], sizeof (struct cpu_node));
213 }
214 
215 /*
216  * Use L2 cache node to derive the chip mapping.
217  */
218 void
219 setup_chip_mappings(md_t *mdp)
220 {
221 	int ncache, ncpu;
222 	mde_cookie_t *node, *cachelist;
223 	int i, j;
224 	processorid_t cpuid;
225 	int idx = 0;
226 
227 	ncache = md_alloc_scan_dag(mdp, md_root_node(mdp), "cache",
228 	    "fwd", &cachelist);
229 
230 	/*
231 	 * The "cache" node is optional in MD, therefore ncaches can be 0.
232 	 */
233 	if (ncache < 1) {
234 		return;
235 	}
236 
237 	for (i = 0; i < ncache; i++) {
238 		uint64_t cache_level;
239 		uint64_t lcpuid;
240 
241 		if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
242 			continue;
243 
244 		if (cache_level != 2)
245 			continue;
246 
247 		/*
248 		 * Found a l2 cache node. Find out the cpu nodes it
249 		 * points to.
250 		 */
251 		ncpu = md_alloc_scan_dag(mdp, cachelist[i], "cpu",
252 		    "back", &node);
253 
254 		if (ncpu < 1)
255 			continue;
256 
257 		for (j = 0; j < ncpu; j++) {
258 			if (md_get_prop_val(mdp, node[j], "id", &lcpuid))
259 				continue;
260 			if (lcpuid >= NCPU)
261 				continue;
262 			cpuid = (processorid_t)lcpuid;
263 			cpunodes[cpuid].l2_cache_mapping = idx;
264 		}
265 		md_free_scan_dag(mdp, &node);
266 
267 		idx++;
268 	}
269 
270 	md_free_scan_dag(mdp, &cachelist);
271 }
272 
273 void
274 setup_exec_unit_mappings(md_t *mdp)
275 {
276 	int num, num_eunits;
277 	mde_cookie_t cpus_node;
278 	mde_cookie_t *node, *eunit;
279 	int idx, i, j;
280 	processorid_t cpuid;
281 	char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit";
282 	enum eu_type { INTEGER, FPU } etype;
283 
284 	/*
285 	 * Find the cpu integer exec units - and
286 	 * setup the mappings appropriately.
287 	 */
288 	num = md_alloc_scan_dag(mdp, md_root_node(mdp), "cpus", "fwd", &node);
289 	if (num < 1)
290 		cmn_err(CE_PANIC, "No cpus node in machine description");
291 	if (num > 1)
292 		cmn_err(CE_PANIC, "More than 1 cpus node in machine"
293 		    " description");
294 
295 	cpus_node = node[0];
296 	md_free_scan_dag(mdp, &node);
297 
298 	num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name,
299 	    "fwd", &eunit);
300 	if (num_eunits > 0) {
301 		char *int_str = broken_md_flag ? "int" : "integer";
302 		char *fpu_str = "fp";
303 
304 		/* Spin through and find all the integer exec units */
305 		for (i = 0; i < num_eunits; i++) {
306 			char *p;
307 			char *val;
308 			int vallen;
309 			uint64_t lcpuid;
310 
311 			/* ignore nodes with no type */
312 			if (md_get_prop_data(mdp, eunit[i], "type",
313 			    (uint8_t **)&val, &vallen))
314 				continue;
315 
316 			for (p = val; *p != '\0'; p += strlen(p) + 1) {
317 				if (strcmp(p, int_str) == 0) {
318 					etype = INTEGER;
319 					goto found;
320 				}
321 				if (strcmp(p, fpu_str) == 0) {
322 					etype = FPU;
323 					goto found;
324 				}
325 			}
326 
327 			continue;
328 found:
329 			idx = NCPU + i;
330 			/*
331 			 * find the cpus attached to this EU and
332 			 * update their mapping indices
333 			 */
334 			num = md_alloc_scan_dag(mdp, eunit[i], "cpu",
335 			    "back", &node);
336 
337 			if (num < 1)
338 				cmn_err(CE_PANIC, "exec-unit node in MD"
339 				    " not attached to a cpu node");
340 
341 			for (j = 0; j < num; j++) {
342 				if (md_get_prop_val(mdp, node[j], "id",
343 				    &lcpuid))
344 					continue;
345 				if (lcpuid >= NCPU)
346 					continue;
347 				cpuid = (processorid_t)lcpuid;
348 				switch (etype) {
349 				case INTEGER:
350 					cpunodes[cpuid].exec_unit_mapping = idx;
351 					break;
352 				case FPU:
353 					cpunodes[cpuid].fpu_mapping = idx;
354 					break;
355 				}
356 			}
357 			md_free_scan_dag(mdp, &node);
358 		}
359 		md_free_scan_dag(mdp, &eunit);
360 	}
361 }
362 
363 /*
364  * Setup instruction cache coherency.  The "memory-coherent" property
365  * is optional.  Default for Icache_coherency is 1 (I$ is coherent).
366  * If we find an Icache with coherency == 0, then enable non-coherent
367  * Icache support.
368  */
369 void
370 setup_icache_coherency(md_t *mdp)
371 {
372 	int ncache;
373 	mde_cookie_t *cachelist;
374 	int i;
375 
376 	ncache = md_alloc_scan_dag(mdp, md_root_node(mdp), "cache",
377 	    "fwd", &cachelist);
378 
379 	/*
380 	 * The "cache" node is optional in MD, therefore ncaches can be 0.
381 	 */
382 	if (ncache < 1) {
383 		return;
384 	}
385 
386 	for (i = 0; i < ncache; i++) {
387 		uint64_t cache_level;
388 		uint64_t memory_coherent;
389 		uint8_t *type;
390 		int typelen;
391 
392 		if (md_get_prop_val(mdp, cachelist[i], "level",
393 		    &cache_level))
394 			continue;
395 
396 		if (cache_level != 1)
397 			continue;
398 
399 		if (md_get_prop_data(mdp, cachelist[i], "type",
400 		    &type, &typelen))
401 			continue;
402 
403 		if (strcmp((char *)type, "instn") != 0)
404 			continue;
405 
406 		if (md_get_prop_val(mdp, cachelist[i], "memory-coherent",
407 		    &memory_coherent))
408 			continue;
409 
410 		if (memory_coherent != 0)
411 			continue;
412 
413 		mach_setup_icache(memory_coherent);
414 		break;
415 	}
416 
417 	md_free_scan_dag(mdp, &cachelist);
418 }
419 
420 /*
421  * All the common setup of sun4v CPU modules is done by this routine.
422  */
423 void
424 cpu_setup_common(char **cpu_module_isa_set)
425 {
426 	extern int mmu_exported_pagesize_mask;
427 	int nocpus, i;
428 	size_t ra_limit;
429 	mde_cookie_t *cpulist;
430 	md_t *mdp;
431 
432 	if ((mdp = md_get_handle()) == NULL)
433 		cmn_err(CE_PANIC, "Unable to initialize machine description");
434 
435 	boot_ncpus = nocpus = md_alloc_scan_dag(mdp,
436 	    md_root_node(mdp), "cpu", "fwd", &cpulist);
437 	if (nocpus < 1) {
438 		cmn_err(CE_PANIC, "cpu_common_setup: cpulist allocation "
439 		    "failed or incorrect number of CPUs in MD");
440 	}
441 
442 	init_md_broken(mdp, cpulist);
443 
444 	if (use_page_coloring) {
445 		do_pg_coloring = 1;
446 	}
447 
448 	/*
449 	 * Get the valid mmu page sizes mask, Q sizes and isalist/r
450 	 * from the MD for the first available CPU in cpulist.
451 	 *
452 	 * Do not expect the MMU page sizes mask to be more than 32-bit.
453 	 */
454 	mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]);
455 
456 	/*
457 	 * Get the number of contexts and tsbs supported.
458 	 */
459 	if (get_mmu_shcontexts(mdp, cpulist[0]) >= MIN_NSHCONTEXTS &&
460 	    get_mmu_tsbs(mdp, cpulist[0]) >= MIN_NTSBS) {
461 		shctx_on = 1;
462 	}
463 
464 	/*
465 	 *  Get and check page search register properties.
466 	 */
467 	pgsz_search_on = check_mmu_pgsz_search(mdp, cpulist[0]);
468 
469 	for (i = 0; i < nocpus; i++)
470 		fill_cpu(mdp, cpulist[i]);
471 
472 	/* setup l2 cache count. */
473 	n_l2_caches = get_l2_cache_node_count(mdp);
474 
475 	setup_chip_mappings(mdp);
476 	setup_exec_unit_mappings(mdp);
477 	setup_icache_coherency(mdp);
478 
479 	/*
480 	 * If MD is broken then append the passed ISA set,
481 	 * otherwise trust the MD.
482 	 */
483 
484 	if (broken_md_flag)
485 		isa_list = construct_isalist(mdp, cpulist[0],
486 		    cpu_module_isa_set);
487 	else
488 		isa_list = construct_isalist(mdp, cpulist[0], NULL);
489 
490 	get_hwcaps(mdp, cpulist[0]);
491 	get_weakest_mem_model(mdp, cpulist[0]);
492 	get_q_sizes(mdp, cpulist[0]);
493 	get_va_bits(mdp, cpulist[0]);
494 
495 	/*
496 	 * ra_limit is the highest real address in the machine.
497 	 */
498 	ra_limit = get_ra_limit(mdp, cpulist[0]);
499 
500 	md_free_scan_dag(mdp, &cpulist);
501 
502 	(void) md_fini_handle(mdp);
503 
504 	/*
505 	 * Block stores invalidate all pages of the d$ so pagecopy
506 	 * et. al. do not need virtual translations with virtual
507 	 * coloring taken into consideration.
508 	 */
509 	pp_consistent_coloring = 0;
510 
511 	/*
512 	 * The kpm mapping window.
513 	 * kpm_size:
514 	 *	The size of a single kpm range.
515 	 *	The overall size will be: kpm_size * vac_colors.
516 	 * kpm_vbase:
517 	 *	The virtual start address of the kpm range within the kernel
518 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
519 	 */
520 
521 	/*
522 	 * Make kpm_vbase, kpm_size aligned to kpm_size_shift.
523 	 * To do this find the nearest power of 2 size that the
524 	 * actual ra_limit fits within.
525 	 * If it is an even power of two use that, otherwise use the
526 	 * next power of two larger than ra_limit.
527 	 */
528 
529 	ASSERT(ra_limit != 0);
530 
531 	kpm_size_shift = (ra_limit & (ra_limit - 1)) != 0 ?
532 	    highbit(ra_limit) : highbit(ra_limit) - 1;
533 
534 	/*
535 	 * No virtual caches on sun4v so size matches size shift
536 	 */
537 	kpm_size = 1ul << kpm_size_shift;
538 
539 	if (va_bits < VA_ADDRESS_SPACE_BITS) {
540 		/*
541 		 * In case of VA hole
542 		 * kpm_base = hole_end + 1TB
543 		 * Starting 1TB beyond where VA hole ends because on Niagara
544 		 * processor software must not use pages within 4GB of the
545 		 * VA hole as instruction pages to avoid problems with
546 		 * prefetching into the VA hole.
547 		 */
548 		kpm_vbase = (caddr_t)((0ull - (1ull << (va_bits - 1))) +
549 		    (1ull << 40));
550 	} else {		/* Number of VA bits 64 ... no VA hole */
551 		kpm_vbase = (caddr_t)0x8000000000000000ull;	/* 8 EB */
552 	}
553 
554 	/*
555 	 * The traptrace code uses either %tick or %stick for
556 	 * timestamping.  The sun4v require use of %stick.
557 	 */
558 	traptrace_use_stick = 1;
559 }
560 
561 /*
562  * Get the nctxs from MD. If absent panic.
563  */
564 static uint64_t
565 get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
566 {
567 	uint64_t ctx_bits;
568 
569 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#context-bits",
570 	    &ctx_bits))
571 		ctx_bits = 0;
572 
573 	if (ctx_bits < MIN_NCTXS_BITS || ctx_bits > MAX_NCTXS_BITS)
574 		cmn_err(CE_PANIC, "Incorrect %ld number of contexts bits "
575 		    "returned by MD", ctx_bits);
576 
577 	return (ctx_bits);
578 }
579 
580 /*
581  * Get the number of tsbs from MD. If absent the default value is 0.
582  */
583 static uint64_t
584 get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie)
585 {
586 	uint64_t number_tsbs;
587 
588 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-#tsbs",
589 	    &number_tsbs))
590 		number_tsbs = 0;
591 
592 	return (number_tsbs);
593 }
594 
595 /*
596  * Get the number of shared contexts from MD. If absent the default value is 0.
597  *
598  */
599 static uint64_t
600 get_mmu_shcontexts(md_t *mdp, mde_cookie_t cpu_node_cookie)
601 {
602 	uint64_t number_contexts;
603 
604 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#shared-contexts",
605 	    &number_contexts))
606 		number_contexts = 0;
607 
608 	return (number_contexts);
609 }
610 
611 /*
612  * Initalize supported page sizes information.
613  * Set to 0, if the page sizes mask information is absent in MD.
614  */
615 static uint64_t
616 get_cpu_pagesizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
617 {
618 	uint64_t mmu_page_size_list;
619 
620 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-page-size-list",
621 	    &mmu_page_size_list))
622 		mmu_page_size_list = 0;
623 
624 	if (mmu_page_size_list == 0 || mmu_page_size_list > MAX_PAGESIZE_MASK)
625 		cmn_err(CE_PANIC, "Incorrect 0x%lx pagesize mask returned"
626 		    "by MD", mmu_page_size_list);
627 
628 	return (mmu_page_size_list);
629 }
630 
631 /*
632  * This routine gets the isalist information from MD and appends
633  * the CPU module ISA set if required.
634  */
635 static char *
636 construct_isalist(md_t *mdp, mde_cookie_t cpu_node_cookie,
637     char **cpu_module_isa_set)
638 {
639 	extern int at_flags;
640 	char *md_isalist;
641 	int md_isalen;
642 	char *isabuf;
643 	int isalen;
644 	char **isa_set;
645 	char *p, *q;
646 	int cpu_module_isalen = 0, found = 0;
647 
648 	(void) md_get_prop_data(mdp, cpu_node_cookie,
649 	    "isalist", (uint8_t **)&isabuf, &isalen);
650 
651 	/*
652 	 * We support binaries for all the cpus that have shipped so far.
653 	 * The kernel emulates instructions that are not supported by hardware.
654 	 */
655 	at_flags = EF_SPARC_SUN_US3 | EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
656 
657 	/*
658 	 * Construct the space separated isa_list.
659 	 */
660 	if (cpu_module_isa_set != NULL) {
661 		for (isa_set = cpu_module_isa_set; *isa_set != NULL;
662 		    isa_set++) {
663 			cpu_module_isalen += strlen(*isa_set);
664 			cpu_module_isalen++;	/* for space character */
665 		}
666 	}
667 
668 	/*
669 	 * Allocate the buffer of MD isa buffer length + CPU module
670 	 * isa buffer length.
671 	 */
672 	md_isalen = isalen + cpu_module_isalen + 2;
673 	md_isalist = (char *)prom_alloc((caddr_t)0, md_isalen, 0);
674 	if (md_isalist == NULL)
675 		cmn_err(CE_PANIC, "construct_isalist: Allocation failed for "
676 		    "md_isalist");
677 
678 	md_isalist[0] = '\0'; /* create an empty string to start */
679 	for (p = isabuf, q = p + isalen; p < q; p += strlen(p) + 1) {
680 		(void) strlcat(md_isalist, p, md_isalen);
681 		(void) strcat(md_isalist, " ");
682 	}
683 
684 	/*
685 	 * Check if the isa_set is present in isalist returned by MD.
686 	 * If yes, then no need to append it, if no then append it to
687 	 * isalist returned by MD.
688 	 */
689 	if (cpu_module_isa_set != NULL) {
690 		for (isa_set = cpu_module_isa_set; *isa_set != NULL;
691 		    isa_set++) {
692 			found = 0;
693 			for (p = isabuf, q = p + isalen; p < q;
694 			    p += strlen(p) + 1) {
695 				if (strcmp(p, *isa_set) == 0) {
696 					found = 1;
697 					break;
698 				}
699 			}
700 			if (!found) {
701 				(void) strlcat(md_isalist, *isa_set, md_isalen);
702 				(void) strcat(md_isalist, " ");
703 			}
704 		}
705 	}
706 
707 	/* Get rid of any trailing white spaces */
708 	md_isalist[strlen(md_isalist) - 1] = '\0';
709 
710 	return (md_isalist);
711 }
712 
713 static void
714 get_hwcaps(md_t *mdp, mde_cookie_t cpu_node_cookie)
715 {
716 	char *hwcapbuf;
717 	int hwcaplen;
718 
719 	if (md_get_prop_data(mdp, cpu_node_cookie,
720 	    "hwcap-list", (uint8_t **)&hwcapbuf, &hwcaplen)) {
721 		/* Property not found */
722 		return;
723 	}
724 
725 	cpu_hwcap_flags |= names2bits(hwcapbuf, hwcaplen, FMT_AV_SPARC,
726 	    "unrecognized token: %s");
727 }
728 
729 static void
730 get_weakest_mem_model(md_t *mdp, mde_cookie_t cpu_node_cookie)
731 {
732 	char *mmbuf;
733 	int mmlen;
734 	uint_t wmm;
735 	char *p, *q;
736 
737 	if (md_get_prop_data(mdp, cpu_node_cookie,
738 	    "memory-model-list", (uint8_t **)&mmbuf, &mmlen)) {
739 		/* Property not found */
740 		return;
741 	}
742 
743 	wmm = TSTATE_MM_TSO;
744 	for (p = mmbuf, q = p + mmlen; p < q; p += strlen(p) + 1) {
745 		if (strcmp(p, "wc") == 0)
746 			wmm = TSTATE_MM_WC;
747 	}
748 	weakest_mem_model = wmm;
749 }
750 
751 /*
752  * Does the opposite of cmn_err(9f) "%b" conversion specification:
753  * Given a list of strings, converts them to a bit-vector.
754  *
755  *  tokens - is a buffer of [NUL-terminated] strings.
756  *  tokenslen - length of tokenbuf in bytes.
757  *  bit_formatter - is a %b format string, such as FMT_AV_SPARC
758  *    from /usr/include/sys/auxv_SPARC.h, of the form:
759  *    <base-char>[<bit-char><token-string>]...
760  *        <base-char> is ignored.
761  *        <bit-char>  is [1-32], as per cmn_err(9f).
762  *  warning - is a printf-style format string containing "%s",
763  *    which is used to print a warning message when an unrecognized
764  *    token is found.  If warning is NULL, no warning is printed.
765  * Returns a bit-vector corresponding to the specified tokens.
766  */
767 
768 static unsigned long
769 names2bits(char *tokens, size_t tokenslen, char *bit_formatter, char *warning)
770 {
771 	char *cur;
772 	size_t  curlen;
773 	unsigned long ul = 0;
774 	char *hit;
775 	char *bs;
776 
777 	bit_formatter++;	/* skip base; not needed for input */
778 	cur = tokens;
779 	while (tokenslen) {
780 		curlen = strlen(cur);
781 		bs = bit_formatter;
782 		/*
783 		 * We need a complicated while loop and the >=32 check,
784 		 * instead of a simple "if (strstr())" so that when the
785 		 * token is "vis", we don't match on "vis2" (for example).
786 		 */
787 		/* LINTED E_EQUALITY_NOT_ASSIGNMENT */
788 		while ((hit = strstr(bs, cur)) &&
789 		    *(hit + curlen) >= 32) {
790 			/*
791 			 * We're still in the middle of a word, i.e., not
792 			 * pointing at a <bit-char>.  So advance ptr
793 			 * to ensure forward progress.
794 			 */
795 			bs = hit + curlen + 1;
796 		}
797 
798 		if (hit != NULL) {
799 			ul |= (1<<(*(hit-1) - 1));
800 		} else {
801 			/* The token wasn't found in bit_formatter */
802 			if (warning != NULL)
803 				cmn_err(CE_WARN, warning, cur);
804 		}
805 		tokenslen -= curlen + 1;
806 		cur += curlen + 1;
807 	}
808 	return (ul);
809 }
810 
811 uint64_t
812 get_ra_limit(md_t *mdp, mde_cookie_t cpu_node_cookie)
813 {
814 	extern int ppvm_enable;
815 	extern int meta_alloc_enable;
816 	mde_cookie_t *mem_list;
817 	mde_cookie_t *mblock_list;
818 	int i;
819 	int memnodes;
820 	int nmblock;
821 	uint64_t r;
822 	uint64_t base;
823 	uint64_t size;
824 	uint64_t ra_limit = 0, new_limit = 0;
825 
826 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#ra-bits", &r) == 0) {
827 		if (r == 0 || r > RA_ADDRESS_SPACE_BITS)
828 			cmn_err(CE_PANIC, "Incorrect number of ra bits in MD");
829 		else {
830 			/*
831 			 * Enable memory DR and metadata (page_t)
832 			 * allocation from existing memory.
833 			 */
834 			ppvm_enable = 1;
835 			meta_alloc_enable = 1;
836 			return (1ULL << r);
837 		}
838 	}
839 
840 	cmn_err(CE_WARN, "mmu-#ra-bits property not found in MD");
841 	cmn_err(CE_WARN, "Memory DR disabled");
842 
843 	memnodes = md_alloc_scan_dag(mdp,
844 	    md_root_node(mdp), "memory", "fwd", &mem_list);
845 
846 	ASSERT(memnodes == 1);
847 
848 	nmblock = md_alloc_scan_dag(mdp,
849 	    mem_list[0], "mblock", "fwd", &mblock_list);
850 	if (nmblock < 1)
851 		cmn_err(CE_PANIC, "cannot find mblock nodes in MD");
852 
853 	for (i = 0; i < nmblock; i++) {
854 		if (md_get_prop_val(mdp, mblock_list[i], "base", &base))
855 			cmn_err(CE_PANIC, "base property missing from MD"
856 			    " mblock node");
857 		if (md_get_prop_val(mdp, mblock_list[i], "size", &size))
858 			cmn_err(CE_PANIC, "size property missing from MD"
859 			    " mblock node");
860 
861 		ASSERT(size != 0);
862 
863 		new_limit = base + size;
864 
865 		if (base > new_limit)
866 			cmn_err(CE_PANIC, "mblock in MD wrapped around");
867 
868 		if (new_limit > ra_limit)
869 			ra_limit = new_limit;
870 	}
871 
872 	ASSERT(ra_limit != 0);
873 
874 	if (ra_limit > MAX_REAL_ADDRESS) {
875 		cmn_err(CE_WARN, "Highest real address in MD too large"
876 		    " clipping to %llx\n", MAX_REAL_ADDRESS);
877 		ra_limit = MAX_REAL_ADDRESS;
878 	}
879 
880 	md_free_scan_dag(mdp, &mblock_list);
881 
882 	md_free_scan_dag(mdp, &mem_list);
883 
884 	return (ra_limit);
885 }
886 
887 /*
888  * This routine sets the globals for CPU and DEV mondo queue entries and
889  * resumable and non-resumable error queue entries.
890  *
891  * First, look up the number of bits available to pass an entry number.
892  * This can vary by platform and may result in allocating an unreasonably
893  * (or impossibly) large amount of memory for the corresponding table,
894  * so we clamp it by 'max_entries'.  Finally, since the q size is used when
895  * calling contig_mem_alloc(), which expects a power of 2, clamp the q size
896  * down to a power of 2.  If the prop is missing, use 'default_entries'.
897  */
898 static uint64_t
899 get_single_q_size(md_t *mdp, mde_cookie_t cpu_node_cookie,
900     char *qnamep, uint64_t default_entries, uint64_t max_entries)
901 {
902 	uint64_t entries;
903 
904 	if (default_entries > max_entries)
905 		cmn_err(CE_CONT, "!get_single_q_size: dflt %ld > "
906 		    "max %ld for %s\n", default_entries, max_entries, qnamep);
907 
908 	if (md_get_prop_val(mdp, cpu_node_cookie, qnamep, &entries)) {
909 		if (!broken_md_flag)
910 			cmn_err(CE_PANIC, "Missing %s property in MD cpu node",
911 			    qnamep);
912 		entries = default_entries;
913 	} else {
914 		entries = 1 << entries;
915 	}
916 
917 	entries = MIN(entries, max_entries);
918 	/* If not a power of 2, truncate to a power of 2. */
919 	if ((entries & (entries - 1)) != 0) {
920 		entries = 1 << (highbit(entries) - 1);
921 	}
922 
923 	return (entries);
924 }
925 
926 /* Scaling constant used to compute size of cpu mondo queue */
927 #define	CPU_MONDO_Q_MULTIPLIER	8
928 
929 static void
930 get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
931 {
932 	uint64_t max_qsize;
933 	mde_cookie_t *platlist;
934 	int nrnode;
935 
936 	/*
937 	 * Compute the maximum number of entries for the cpu mondo queue.
938 	 * Use the appropriate property in the platform node, if it is
939 	 * available.  Else, base it on NCPU.
940 	 */
941 	nrnode = md_alloc_scan_dag(mdp,
942 	    md_root_node(mdp), "platform", "fwd", &platlist);
943 
944 	ASSERT(nrnode == 1);
945 
946 	ncpu_guest_max = NCPU;
947 	(void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpu_guest_max);
948 	max_qsize = ncpu_guest_max * CPU_MONDO_Q_MULTIPLIER;
949 
950 	md_free_scan_dag(mdp, &platlist);
951 
952 	cpu_q_entries = get_single_q_size(mdp, cpu_node_cookie,
953 	    "q-cpu-mondo-#bits", DEFAULT_CPU_Q_ENTRIES, max_qsize);
954 
955 	dev_q_entries = get_single_q_size(mdp, cpu_node_cookie,
956 	    "q-dev-mondo-#bits", DEFAULT_DEV_Q_ENTRIES, MAXIVNUM);
957 
958 	cpu_rq_entries = get_single_q_size(mdp, cpu_node_cookie,
959 	    "q-resumable-#bits", CPU_RQ_ENTRIES, MAX_CPU_RQ_ENTRIES);
960 
961 	cpu_nrq_entries = get_single_q_size(mdp, cpu_node_cookie,
962 	    "q-nonresumable-#bits", CPU_NRQ_ENTRIES, MAX_CPU_NRQ_ENTRIES);
963 }
964 
965 
966 static void
967 get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
968 {
969 	uint64_t value = VA_ADDRESS_SPACE_BITS;
970 
971 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#va-bits", &value))
972 		cmn_err(CE_PANIC, "mmu-#va-bits property  not found in MD");
973 
974 
975 	if (value == 0 || value > VA_ADDRESS_SPACE_BITS)
976 		cmn_err(CE_PANIC, "Incorrect number of va bits in MD");
977 
978 	/* Do not expect number of VA bits to be more than 32-bit quantity */
979 
980 	va_bits = (int)value;
981 
982 	/*
983 	 * Correct the value for VA bits on UltraSPARC-T1 based systems
984 	 * in case of broken MD.
985 	 */
986 	if (broken_md_flag)
987 		va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS;
988 }
989 
990 int
991 l2_cache_node_count(void)
992 {
993 	return (n_l2_caches);
994 }
995 
996 /*
997  * count the number of l2 caches.
998  */
999 int
1000 get_l2_cache_node_count(md_t *mdp)
1001 {
1002 	int i;
1003 	mde_cookie_t *cachenodes;
1004 	uint64_t level;
1005 	int n_cachenodes = md_alloc_scan_dag(mdp, md_root_node(mdp),
1006 	    "cache", "fwd", &cachenodes);
1007 	int l2_caches = 0;
1008 
1009 	for (i = 0; i < n_cachenodes; i++) {
1010 		if (md_get_prop_val(mdp, cachenodes[i], "level", &level) != 0) {
1011 			level = 0;
1012 		}
1013 		if (level == 2) {
1014 			l2_caches++;
1015 		}
1016 	}
1017 	md_free_scan_dag(mdp, &cachenodes);
1018 	return (l2_caches);
1019 }
1020 
1021 /*
1022  * This routine returns the L2 cache information such as -- associativity,
1023  * size and linesize.
1024  */
1025 static int
1026 get_l2_cache_info(md_t *mdp, mde_cookie_t cpu_node_cookie,
1027 	    uint64_t *associativity, uint64_t *size, uint64_t *linesize)
1028 {
1029 	mde_cookie_t *cachelist;
1030 	int ncaches, i;
1031 	uint64_t cache_level = 0;
1032 
1033 	ncaches = md_alloc_scan_dag(mdp, cpu_node_cookie, "cache",
1034 	    "fwd", &cachelist);
1035 	/*
1036 	 * The "cache" node is optional in MD, therefore ncaches can be 0.
1037 	 */
1038 	if (ncaches < 1) {
1039 		return (0);
1040 	}
1041 
1042 	for (i = 0; i < ncaches; i++) {
1043 		uint64_t local_assoc;
1044 		uint64_t local_size;
1045 		uint64_t local_lsize;
1046 
1047 		if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
1048 			continue;
1049 
1050 		if (cache_level != 2) continue;
1051 
1052 		/* If properties are missing from this cache ignore it */
1053 
1054 		if ((md_get_prop_val(mdp, cachelist[i],
1055 		    "associativity", &local_assoc))) {
1056 			continue;
1057 		}
1058 
1059 		if ((md_get_prop_val(mdp, cachelist[i],
1060 		    "size", &local_size))) {
1061 			continue;
1062 		}
1063 
1064 		if ((md_get_prop_val(mdp, cachelist[i],
1065 		    "line-size", &local_lsize))) {
1066 			continue;
1067 		}
1068 
1069 		*associativity = local_assoc;
1070 		*size = local_size;
1071 		*linesize = local_lsize;
1072 		break;
1073 	}
1074 
1075 	md_free_scan_dag(mdp, &cachelist);
1076 
1077 	return ((cache_level == 2) ? 1 : 0);
1078 }
1079 
1080 
1081 /*
1082  * Set the broken_md_flag to 1 if the MD doesn't have
1083  * the domaining-enabled property in the platform node and the
1084  * platform uses the UltraSPARC-T1 cpu. This flag is used to
1085  * workaround some of the incorrect MD properties.
1086  */
1087 static void
1088 init_md_broken(md_t *mdp, mde_cookie_t *cpulist)
1089 {
1090 	int nrnode;
1091 	mde_cookie_t *platlist, rootnode;
1092 	uint64_t val = 0;
1093 	char *namebuf;
1094 	int namelen;
1095 
1096 	rootnode = md_root_node(mdp);
1097 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1098 	ASSERT(cpulist);
1099 
1100 	nrnode = md_alloc_scan_dag(mdp, rootnode, "platform", "fwd",
1101 	    &platlist);
1102 
1103 	if (nrnode < 1)
1104 		cmn_err(CE_PANIC, "init_md_broken: platform node missing");
1105 
1106 	if (md_get_prop_data(mdp, cpulist[0],
1107 	    "compatible", (uint8_t **)&namebuf, &namelen)) {
1108 		cmn_err(CE_PANIC, "init_md_broken: "
1109 		    "Cannot read 'compatible' property of 'cpu' node");
1110 	}
1111 
1112 	if (md_get_prop_val(mdp, platlist[0],
1113 	    "domaining-enabled", &val) == -1 &&
1114 	    strcmp(namebuf, "SUNW,UltraSPARC-T1") == 0)
1115 		broken_md_flag = 1;
1116 
1117 	md_free_scan_dag(mdp, &platlist);
1118 }
1119 
1120 /*
1121  * This routine gets the MD properties associated with the TLB search order API
1122  * and compares these against the expected values for a processor which supports
1123  * this API. The return value is used to determine whether use the API.
1124  */
1125 static int
1126 check_mmu_pgsz_search(md_t *mdp, mde_cookie_t cpu_node_cookie)
1127 {
1128 
1129 	uint64_t mmu_search_nshared_contexts;
1130 	uint64_t mmu_max_search_order;
1131 	uint64_t mmu_non_priv_search_unified;
1132 	uint64_t mmu_search_page_size_list;
1133 
1134 	if (md_get_prop_val(mdp, cpu_node_cookie,
1135 	    "mmu-search-#shared-contexts", &mmu_search_nshared_contexts))
1136 		mmu_search_nshared_contexts = 0;
1137 
1138 	if (mmu_search_nshared_contexts == 0 ||
1139 	    mmu_search_nshared_contexts != NSEARCH_SHCONTEXTS)
1140 		return (0);
1141 
1142 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-search-order",
1143 	    &mmu_max_search_order))
1144 		mmu_max_search_order = 0;
1145 
1146 	if (mmu_max_search_order == 0 || mmu_max_search_order !=
1147 	    MAX_PGSZ_SEARCH_ORDER)
1148 		return (0);
1149 
1150 	if (md_get_prop_val(mdp, cpu_node_cookie,
1151 	    "mmu-non-priv-search-unified", &mmu_non_priv_search_unified))
1152 		mmu_non_priv_search_unified = -1;
1153 
1154 	if (mmu_non_priv_search_unified != 1) {
1155 		return (0);
1156 	}
1157 
1158 	if (md_get_prop_val(mdp, cpu_node_cookie,
1159 	    "mmu-search-page-size-list", &mmu_search_page_size_list)) {
1160 		mmu_search_page_size_list = 0;
1161 		return (0);
1162 	}
1163 
1164 	return (1);
1165 }
1166