xref: /titanic_52/usr/src/uts/sun4v/os/fillsysinfo.c (revision 209e49b2ff611e7d61ff58e13756ae67f51be550)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/errno.h>
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/cpu.h>
29 #include <sys/cpuvar.h>
30 #include <sys/clock.h>
31 #include <sys/promif.h>
32 #include <sys/promimpl.h>
33 #include <sys/systm.h>
34 #include <sys/machsystm.h>
35 #include <sys/debug.h>
36 #include <sys/sunddi.h>
37 #include <sys/modctl.h>
38 #include <sys/cpu_module.h>
39 #include <sys/kobj.h>
40 #include <sys/cmp.h>
41 #include <sys/async.h>
42 #include <vm/page.h>
43 #include <vm/hat_sfmmu.h>
44 #include <sys/sysmacros.h>
45 #include <sys/mach_descrip.h>
46 #include <sys/mdesc.h>
47 #include <sys/archsystm.h>
48 #include <sys/error.h>
49 #include <sys/mmu.h>
50 #include <sys/bitmap.h>
51 #include <sys/intreg.h>
52 #include <sys/instance.h>
53 
54 struct cpu_node cpunodes[NCPU];
55 
56 uint64_t cpu_q_entries;
57 uint64_t dev_q_entries;
58 uint64_t cpu_rq_entries;
59 uint64_t cpu_nrq_entries;
60 uint64_t ncpu_guest_max;
61 
62 void fill_cpu(md_t *, mde_cookie_t);
63 
64 static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
65 static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t);
66 static uint64_t	get_mmu_shcontexts(md_t *, mde_cookie_t);
67 static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
68 static char *construct_isalist(md_t *, mde_cookie_t, char **);
69 static void init_md_broken(md_t *, mde_cookie_t *);
70 static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
71     uint64_t *);
72 static void get_hwcaps(md_t *, mde_cookie_t);
73 static void get_weakest_mem_model(md_t *, mde_cookie_t);
74 static void get_q_sizes(md_t *, mde_cookie_t);
75 static void get_va_bits(md_t *, mde_cookie_t);
76 static size_t get_ra_limit(md_t *, mde_cookie_t);
77 static int get_l2_cache_node_count(md_t *);
78 static unsigned long names2bits(char *tokens, size_t tokenslen,
79     char *bit_formatter, char *warning);
80 
81 uint64_t	system_clock_freq;
82 uint_t		niommu_tsbs = 0;
83 
84 static int n_l2_caches = 0;
85 
86 /* prevent compilation with VAC defined */
87 #ifdef VAC
88 #error "The sun4v architecture does not support VAC"
89 #endif
90 
91 #define	S_VAC_SIZE	MMU_PAGESIZE
92 #define	S_VAC_SHIFT	MMU_PAGESHIFT
93 
94 int		vac_size = S_VAC_SIZE;
95 uint_t		vac_mask = MMU_PAGEMASK & (S_VAC_SIZE - 1);
96 int		vac_shift = S_VAC_SHIFT;
97 uintptr_t	shm_alignment = S_VAC_SIZE;
98 
99 void
100 map_wellknown_devices()
101 {
102 }
103 
104 void
105 fill_cpu(md_t *mdp, mde_cookie_t cpuc)
106 {
107 	struct cpu_node *cpunode;
108 	uint64_t cpuid;
109 	uint64_t clk_freq;
110 	char *namebuf;
111 	char *namebufp;
112 	int namelen;
113 	uint64_t associativity = 0, linesize = 0, size = 0;
114 
115 	if (md_get_prop_val(mdp, cpuc, "id", &cpuid)) {
116 		return;
117 	}
118 
119 	/* All out-of-range cpus will be stopped later. */
120 	if (cpuid >= NCPU) {
121 		cmn_err(CE_CONT, "fill_cpu: out of range cpuid %ld - "
122 		    "cpu excluded from configuration\n", cpuid);
123 
124 		return;
125 	}
126 
127 	cpunode = &cpunodes[cpuid];
128 	cpunode->cpuid = (int)cpuid;
129 	cpunode->device_id = cpuid;
130 
131 	if (sizeof (cpunode->fru_fmri) > strlen(CPU_FRU_FMRI))
132 		(void) strcpy(cpunode->fru_fmri, CPU_FRU_FMRI);
133 
134 	if (md_get_prop_data(mdp, cpuc,
135 	    "compatible", (uint8_t **)&namebuf, &namelen)) {
136 		cmn_err(CE_PANIC, "fill_cpu: Cannot read compatible "
137 		    "property");
138 	}
139 	namebufp = namebuf;
140 	if (strncmp(namebufp, "SUNW,", 5) == 0)
141 		namebufp += 5;
142 	if (strlen(namebufp) > sizeof (cpunode->name))
143 		cmn_err(CE_PANIC, "Compatible property too big to "
144 		    "fit into the cpunode name buffer");
145 	(void) strcpy(cpunode->name, namebufp);
146 
147 	if (md_get_prop_val(mdp, cpuc,
148 	    "clock-frequency", &clk_freq)) {
149 			clk_freq = 0;
150 	}
151 	cpunode->clock_freq = clk_freq;
152 
153 	ASSERT(cpunode->clock_freq != 0);
154 	/*
155 	 * Compute scaling factor based on rate of %tick. This is used
156 	 * to convert from ticks derived from %tick to nanoseconds. See
157 	 * comment in sun4u/sys/clock.h for details.
158 	 */
159 	cpunode->tick_nsec_scale = (uint_t)(((uint64_t)NANOSEC <<
160 	    (32 - TICK_NSEC_SHIFT)) / cpunode->clock_freq);
161 
162 	/*
163 	 * The nodeid is not used in sun4v at all. Setting it
164 	 * to positive value to make starting of slave CPUs
165 	 * code happy.
166 	 */
167 	cpunode->nodeid = cpuid + 1;
168 
169 	/*
170 	 * Obtain the L2 cache information from MD.
171 	 * If "Cache" node exists, then set L2 cache properties
172 	 * as read from MD.
173 	 * If node does not exists, then set the L2 cache properties
174 	 * in individual CPU module.
175 	 */
176 	if ((!get_l2_cache_info(mdp, cpuc,
177 	    &associativity, &size, &linesize)) ||
178 	    associativity == 0 || size == 0 || linesize == 0) {
179 		cpu_fiximp(cpunode);
180 	} else {
181 		/*
182 		 * Do not expect L2 cache properties to be bigger
183 		 * than 32-bit quantity.
184 		 */
185 		cpunode->ecache_associativity = (int)associativity;
186 		cpunode->ecache_size = (int)size;
187 		cpunode->ecache_linesize = (int)linesize;
188 	}
189 
190 	cpunode->ecache_setsize =
191 	    cpunode->ecache_size / cpunode->ecache_associativity;
192 
193 	/*
194 	 * Initialize the mapping for exec unit, chip and core.
195 	 */
196 	cpunode->exec_unit_mapping = NO_EU_MAPPING_FOUND;
197 	cpunode->l2_cache_mapping = NO_MAPPING_FOUND;
198 	cpunode->core_mapping = NO_CORE_MAPPING_FOUND;
199 
200 	if (ecache_setsize == 0)
201 		ecache_setsize = cpunode->ecache_setsize;
202 	if (ecache_alignsize == 0)
203 		ecache_alignsize = cpunode->ecache_linesize;
204 
205 }
206 
207 void
208 empty_cpu(int cpuid)
209 {
210 	bzero(&cpunodes[cpuid], sizeof (struct cpu_node));
211 }
212 
213 /*
214  * Use L2 cache node to derive the chip mapping.
215  */
216 void
217 setup_chip_mappings(md_t *mdp)
218 {
219 	int ncache, ncpu;
220 	mde_cookie_t *node, *cachelist;
221 	int i, j;
222 	processorid_t cpuid;
223 	int idx = 0;
224 
225 	ncache = md_alloc_scan_dag(mdp, md_root_node(mdp), "cache",
226 	    "fwd", &cachelist);
227 
228 	/*
229 	 * The "cache" node is optional in MD, therefore ncaches can be 0.
230 	 */
231 	if (ncache < 1) {
232 		return;
233 	}
234 
235 	for (i = 0; i < ncache; i++) {
236 		uint64_t cache_level;
237 		uint64_t lcpuid;
238 
239 		if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
240 			continue;
241 
242 		if (cache_level != 2)
243 			continue;
244 
245 		/*
246 		 * Found a l2 cache node. Find out the cpu nodes it
247 		 * points to.
248 		 */
249 		ncpu = md_alloc_scan_dag(mdp, cachelist[i], "cpu",
250 		    "back", &node);
251 
252 		if (ncpu < 1)
253 			continue;
254 
255 		for (j = 0; j < ncpu; j++) {
256 			if (md_get_prop_val(mdp, node[j], "id", &lcpuid))
257 				continue;
258 			if (lcpuid >= NCPU)
259 				continue;
260 			cpuid = (processorid_t)lcpuid;
261 			cpunodes[cpuid].l2_cache_mapping = idx;
262 		}
263 		md_free_scan_dag(mdp, &node);
264 
265 		idx++;
266 	}
267 
268 	md_free_scan_dag(mdp, &cachelist);
269 }
270 
271 void
272 setup_exec_unit_mappings(md_t *mdp)
273 {
274 	int num, num_eunits;
275 	mde_cookie_t cpus_node;
276 	mde_cookie_t *node, *eunit;
277 	int idx, i, j;
278 	processorid_t cpuid;
279 	char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit";
280 	enum eu_type { INTEGER, FPU } etype;
281 
282 	/*
283 	 * Find the cpu integer exec units - and
284 	 * setup the mappings appropriately.
285 	 */
286 	num = md_alloc_scan_dag(mdp, md_root_node(mdp), "cpus", "fwd", &node);
287 	if (num < 1)
288 		cmn_err(CE_PANIC, "No cpus node in machine description");
289 	if (num > 1)
290 		cmn_err(CE_PANIC, "More than 1 cpus node in machine"
291 		    " description");
292 
293 	cpus_node = node[0];
294 	md_free_scan_dag(mdp, &node);
295 
296 	num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name,
297 	    "fwd", &eunit);
298 	if (num_eunits > 0) {
299 		char *int_str = broken_md_flag ? "int" : "integer";
300 		char *fpu_str = "fp";
301 
302 		/* Spin through and find all the integer exec units */
303 		for (i = 0; i < num_eunits; i++) {
304 			char *p;
305 			char *val;
306 			int vallen;
307 			uint64_t lcpuid;
308 
309 			/* ignore nodes with no type */
310 			if (md_get_prop_data(mdp, eunit[i], "type",
311 			    (uint8_t **)&val, &vallen))
312 				continue;
313 
314 			for (p = val; *p != '\0'; p += strlen(p) + 1) {
315 				if (strcmp(p, int_str) == 0) {
316 					etype = INTEGER;
317 					goto found;
318 				}
319 				if (strcmp(p, fpu_str) == 0) {
320 					etype = FPU;
321 					goto found;
322 				}
323 			}
324 
325 			continue;
326 found:
327 			idx = NCPU + i;
328 			/*
329 			 * find the cpus attached to this EU and
330 			 * update their mapping indices
331 			 */
332 			num = md_alloc_scan_dag(mdp, eunit[i], "cpu",
333 			    "back", &node);
334 
335 			if (num < 1)
336 				cmn_err(CE_PANIC, "exec-unit node in MD"
337 				    " not attached to a cpu node");
338 
339 			for (j = 0; j < num; j++) {
340 				if (md_get_prop_val(mdp, node[j], "id",
341 				    &lcpuid))
342 					continue;
343 				if (lcpuid >= NCPU)
344 					continue;
345 				cpuid = (processorid_t)lcpuid;
346 				switch (etype) {
347 				case INTEGER:
348 					cpunodes[cpuid].exec_unit_mapping = idx;
349 					break;
350 				case FPU:
351 					cpunodes[cpuid].fpu_mapping = idx;
352 					break;
353 				}
354 			}
355 			md_free_scan_dag(mdp, &node);
356 		}
357 
358 
359 		md_free_scan_dag(mdp, &eunit);
360 	}
361 }
362 
363 /*
364  * All the common setup of sun4v CPU modules is done by this routine.
365  */
366 void
367 cpu_setup_common(char **cpu_module_isa_set)
368 {
369 	extern int mmu_exported_pagesize_mask;
370 	int nocpus, i;
371 	size_t ra_limit;
372 	mde_cookie_t *cpulist;
373 	md_t *mdp;
374 
375 	if ((mdp = md_get_handle()) == NULL)
376 		cmn_err(CE_PANIC, "Unable to initialize machine description");
377 
378 	boot_ncpus = nocpus = md_alloc_scan_dag(mdp,
379 	    md_root_node(mdp), "cpu", "fwd", &cpulist);
380 	if (nocpus < 1) {
381 		cmn_err(CE_PANIC, "cpu_common_setup: cpulist allocation "
382 		    "failed or incorrect number of CPUs in MD");
383 	}
384 
385 	init_md_broken(mdp, cpulist);
386 
387 	if (use_page_coloring) {
388 		do_pg_coloring = 1;
389 	}
390 
391 	/*
392 	 * Get the valid mmu page sizes mask, Q sizes and isalist/r
393 	 * from the MD for the first available CPU in cpulist.
394 	 *
395 	 * Do not expect the MMU page sizes mask to be more than 32-bit.
396 	 */
397 	mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]);
398 
399 	/*
400 	 * Get the number of contexts and tsbs supported.
401 	 */
402 	if (get_mmu_shcontexts(mdp, cpulist[0]) >= MIN_NSHCONTEXTS &&
403 	    get_mmu_tsbs(mdp, cpulist[0]) >= MIN_NTSBS) {
404 		shctx_on = 1;
405 	}
406 
407 	for (i = 0; i < nocpus; i++)
408 		fill_cpu(mdp, cpulist[i]);
409 
410 	/* setup l2 cache count. */
411 	n_l2_caches = get_l2_cache_node_count(mdp);
412 
413 	setup_chip_mappings(mdp);
414 	setup_exec_unit_mappings(mdp);
415 
416 	/*
417 	 * If MD is broken then append the passed ISA set,
418 	 * otherwise trust the MD.
419 	 */
420 
421 	if (broken_md_flag)
422 		isa_list = construct_isalist(mdp, cpulist[0],
423 		    cpu_module_isa_set);
424 	else
425 		isa_list = construct_isalist(mdp, cpulist[0], NULL);
426 
427 	get_hwcaps(mdp, cpulist[0]);
428 	get_weakest_mem_model(mdp, cpulist[0]);
429 	get_q_sizes(mdp, cpulist[0]);
430 	get_va_bits(mdp, cpulist[0]);
431 
432 	/*
433 	 * ra_limit is the highest real address in the machine.
434 	 */
435 	ra_limit = get_ra_limit(mdp, cpulist[0]);
436 
437 	md_free_scan_dag(mdp, &cpulist);
438 
439 	(void) md_fini_handle(mdp);
440 
441 	/*
442 	 * Block stores invalidate all pages of the d$ so pagecopy
443 	 * et. al. do not need virtual translations with virtual
444 	 * coloring taken into consideration.
445 	 */
446 	pp_consistent_coloring = 0;
447 
448 	/*
449 	 * The kpm mapping window.
450 	 * kpm_size:
451 	 *	The size of a single kpm range.
452 	 *	The overall size will be: kpm_size * vac_colors.
453 	 * kpm_vbase:
454 	 *	The virtual start address of the kpm range within the kernel
455 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
456 	 */
457 
458 	/*
459 	 * Make kpm_vbase, kpm_size aligned to kpm_size_shift.
460 	 * To do this find the nearest power of 2 size that the
461 	 * actual ra_limit fits within.
462 	 * If it is an even power of two use that, otherwise use the
463 	 * next power of two larger than ra_limit.
464 	 */
465 
466 	ASSERT(ra_limit != 0);
467 
468 	kpm_size_shift = (ra_limit & (ra_limit - 1)) != 0 ?
469 	    highbit(ra_limit) : highbit(ra_limit) - 1;
470 
471 	/*
472 	 * No virtual caches on sun4v so size matches size shift
473 	 */
474 	kpm_size = 1ul << kpm_size_shift;
475 
476 	if (va_bits < VA_ADDRESS_SPACE_BITS) {
477 		/*
478 		 * In case of VA hole
479 		 * kpm_base = hole_end + 1TB
480 		 * Starting 1TB beyond where VA hole ends because on Niagara
481 		 * processor software must not use pages within 4GB of the
482 		 * VA hole as instruction pages to avoid problems with
483 		 * prefetching into the VA hole.
484 		 */
485 		kpm_vbase = (caddr_t)((0ull - (1ull << (va_bits - 1))) +
486 		    (1ull << 40));
487 	} else {		/* Number of VA bits 64 ... no VA hole */
488 		kpm_vbase = (caddr_t)0x8000000000000000ull;	/* 8 EB */
489 	}
490 
491 	/*
492 	 * The traptrace code uses either %tick or %stick for
493 	 * timestamping.  The sun4v require use of %stick.
494 	 */
495 	traptrace_use_stick = 1;
496 }
497 
498 /*
499  * Get the nctxs from MD. If absent panic.
500  */
501 static uint64_t
502 get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
503 {
504 	uint64_t ctx_bits;
505 
506 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#context-bits",
507 	    &ctx_bits))
508 		ctx_bits = 0;
509 
510 	if (ctx_bits < MIN_NCTXS_BITS || ctx_bits > MAX_NCTXS_BITS)
511 		cmn_err(CE_PANIC, "Incorrect %ld number of contexts bits "
512 		    "returned by MD", ctx_bits);
513 
514 	return (ctx_bits);
515 }
516 
517 /*
518  * Get the number of tsbs from MD. If absent the default value is 0.
519  */
520 static uint64_t
521 get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie)
522 {
523 	uint64_t number_tsbs;
524 
525 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-#tsbs",
526 	    &number_tsbs))
527 		number_tsbs = 0;
528 
529 	return (number_tsbs);
530 }
531 
532 /*
533  * Get the number of shared contexts from MD. If absent the default value is 0.
534  *
535  */
536 static uint64_t
537 get_mmu_shcontexts(md_t *mdp, mde_cookie_t cpu_node_cookie)
538 {
539 	uint64_t number_contexts;
540 
541 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#shared-contexts",
542 	    &number_contexts))
543 		number_contexts = 0;
544 
545 	return (number_contexts);
546 }
547 
548 /*
549  * Initalize supported page sizes information.
550  * Set to 0, if the page sizes mask information is absent in MD.
551  */
552 static uint64_t
553 get_cpu_pagesizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
554 {
555 	uint64_t mmu_page_size_list;
556 
557 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-page-size-list",
558 	    &mmu_page_size_list))
559 		mmu_page_size_list = 0;
560 
561 	if (mmu_page_size_list == 0 || mmu_page_size_list > MAX_PAGESIZE_MASK)
562 		cmn_err(CE_PANIC, "Incorrect 0x%lx pagesize mask returned"
563 		    "by MD", mmu_page_size_list);
564 
565 	return (mmu_page_size_list);
566 }
567 
568 /*
569  * This routine gets the isalist information from MD and appends
570  * the CPU module ISA set if required.
571  */
572 static char *
573 construct_isalist(md_t *mdp, mde_cookie_t cpu_node_cookie,
574     char **cpu_module_isa_set)
575 {
576 	extern int at_flags;
577 	char *md_isalist;
578 	int md_isalen;
579 	char *isabuf;
580 	int isalen;
581 	char **isa_set;
582 	char *p, *q;
583 	int cpu_module_isalen = 0, found = 0;
584 
585 	(void) md_get_prop_data(mdp, cpu_node_cookie,
586 	    "isalist", (uint8_t **)&isabuf, &isalen);
587 
588 	/*
589 	 * We support binaries for all the cpus that have shipped so far.
590 	 * The kernel emulates instructions that are not supported by hardware.
591 	 */
592 	at_flags = EF_SPARC_SUN_US3 | EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
593 
594 	/*
595 	 * Construct the space separated isa_list.
596 	 */
597 	if (cpu_module_isa_set != NULL) {
598 		for (isa_set = cpu_module_isa_set; *isa_set != NULL;
599 		    isa_set++) {
600 			cpu_module_isalen += strlen(*isa_set);
601 			cpu_module_isalen++;	/* for space character */
602 		}
603 	}
604 
605 	/*
606 	 * Allocate the buffer of MD isa buffer length + CPU module
607 	 * isa buffer length.
608 	 */
609 	md_isalen = isalen + cpu_module_isalen + 2;
610 	md_isalist = (char *)prom_alloc((caddr_t)0, md_isalen, 0);
611 	if (md_isalist == NULL)
612 		cmn_err(CE_PANIC, "construct_isalist: Allocation failed for "
613 		    "md_isalist");
614 
615 	md_isalist[0] = '\0'; /* create an empty string to start */
616 	for (p = isabuf, q = p + isalen; p < q; p += strlen(p) + 1) {
617 		(void) strlcat(md_isalist, p, md_isalen);
618 		(void) strcat(md_isalist, " ");
619 	}
620 
621 	/*
622 	 * Check if the isa_set is present in isalist returned by MD.
623 	 * If yes, then no need to append it, if no then append it to
624 	 * isalist returned by MD.
625 	 */
626 	if (cpu_module_isa_set != NULL) {
627 		for (isa_set = cpu_module_isa_set; *isa_set != NULL;
628 		    isa_set++) {
629 			found = 0;
630 			for (p = isabuf, q = p + isalen; p < q;
631 			    p += strlen(p) + 1) {
632 				if (strcmp(p, *isa_set) == 0) {
633 					found = 1;
634 					break;
635 				}
636 			}
637 			if (!found) {
638 				(void) strlcat(md_isalist, *isa_set, md_isalen);
639 				(void) strcat(md_isalist, " ");
640 			}
641 		}
642 	}
643 
644 	/* Get rid of any trailing white spaces */
645 	md_isalist[strlen(md_isalist) - 1] = '\0';
646 
647 	return (md_isalist);
648 }
649 
650 static void
651 get_hwcaps(md_t *mdp, mde_cookie_t cpu_node_cookie)
652 {
653 	char *hwcapbuf;
654 	int hwcaplen;
655 
656 	if (md_get_prop_data(mdp, cpu_node_cookie,
657 	    "hwcap-list", (uint8_t **)&hwcapbuf, &hwcaplen)) {
658 		/* Property not found */
659 		return;
660 	}
661 
662 	cpu_hwcap_flags |= names2bits(hwcapbuf, hwcaplen, FMT_AV_SPARC,
663 	    "unrecognized token: %s");
664 }
665 
666 static void
667 get_weakest_mem_model(md_t *mdp, mde_cookie_t cpu_node_cookie)
668 {
669 	char *mmbuf;
670 	int mmlen;
671 	uint_t wmm;
672 	char *p, *q;
673 
674 	if (md_get_prop_data(mdp, cpu_node_cookie,
675 	    "memory-model-list", (uint8_t **)&mmbuf, &mmlen)) {
676 		/* Property not found */
677 		return;
678 	}
679 
680 	wmm = TSTATE_MM_TSO;
681 	for (p = mmbuf, q = p + mmlen; p < q; p += strlen(p) + 1) {
682 		if (strcmp(p, "wc") == 0)
683 			wmm = TSTATE_MM_WC;
684 	}
685 	weakest_mem_model = wmm;
686 }
687 
688 /*
689  * Does the opposite of cmn_err(9f) "%b" conversion specification:
690  * Given a list of strings, converts them to a bit-vector.
691  *
692  *  tokens - is a buffer of [NUL-terminated] strings.
693  *  tokenslen - length of tokenbuf in bytes.
694  *  bit_formatter - is a %b format string, such as FMT_AV_SPARC
695  *    from /usr/include/sys/auxv_SPARC.h, of the form:
696  *    <base-char>[<bit-char><token-string>]...
697  *        <base-char> is ignored.
698  *        <bit-char>  is [1-32], as per cmn_err(9f).
699  *  warning - is a printf-style format string containing "%s",
700  *    which is used to print a warning message when an unrecognized
701  *    token is found.  If warning is NULL, no warning is printed.
702  * Returns a bit-vector corresponding to the specified tokens.
703  */
704 
705 static unsigned long
706 names2bits(char *tokens, size_t tokenslen, char *bit_formatter, char *warning)
707 {
708 	char *cur;
709 	size_t  curlen;
710 	unsigned long ul = 0;
711 	char *hit;
712 	char *bs;
713 
714 	bit_formatter++;	/* skip base; not needed for input */
715 	cur = tokens;
716 	while (tokenslen) {
717 		curlen = strlen(cur);
718 		bs = bit_formatter;
719 		/*
720 		 * We need a complicated while loop and the >=32 check,
721 		 * instead of a simple "if (strstr())" so that when the
722 		 * token is "vis", we don't match on "vis2" (for example).
723 		 */
724 		/* LINTED E_EQUALITY_NOT_ASSIGNMENT */
725 		while ((hit = strstr(bs, cur)) &&
726 		    *(hit + curlen) >= 32) {
727 			/*
728 			 * We're still in the middle of a word, i.e., not
729 			 * pointing at a <bit-char>.  So advance ptr
730 			 * to ensure forward progress.
731 			 */
732 			bs = hit + curlen + 1;
733 		}
734 
735 		if (hit != NULL) {
736 			ul |= (1<<(*(hit-1) - 1));
737 		} else {
738 			/* The token wasn't found in bit_formatter */
739 			if (warning != NULL)
740 				cmn_err(CE_WARN, warning, cur);
741 		}
742 		tokenslen -= curlen + 1;
743 		cur += curlen + 1;
744 	}
745 	return (ul);
746 }
747 
748 uint64_t
749 get_ra_limit(md_t *mdp, mde_cookie_t cpu_node_cookie)
750 {
751 	extern int ppvm_enable;
752 	extern int meta_alloc_enable;
753 	mde_cookie_t *mem_list;
754 	mde_cookie_t *mblock_list;
755 	int i;
756 	int memnodes;
757 	int nmblock;
758 	uint64_t r;
759 	uint64_t base;
760 	uint64_t size;
761 	uint64_t ra_limit = 0, new_limit = 0;
762 
763 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#ra-bits", &r) == 0) {
764 		if (r == 0 || r > RA_ADDRESS_SPACE_BITS)
765 			cmn_err(CE_PANIC, "Incorrect number of ra bits in MD");
766 		else {
767 			/*
768 			 * Enable memory DR and metadata (page_t)
769 			 * allocation from existing memory.
770 			 */
771 			ppvm_enable = 1;
772 			meta_alloc_enable = 1;
773 			return (1ULL << r);
774 		}
775 	}
776 
777 	memnodes = md_alloc_scan_dag(mdp,
778 	    md_root_node(mdp), "memory", "fwd", &mem_list);
779 
780 	ASSERT(memnodes == 1);
781 
782 	nmblock = md_alloc_scan_dag(mdp,
783 	    mem_list[0], "mblock", "fwd", &mblock_list);
784 	if (nmblock < 1)
785 		cmn_err(CE_PANIC, "cannot find mblock nodes in MD");
786 
787 	for (i = 0; i < nmblock; i++) {
788 		if (md_get_prop_val(mdp, mblock_list[i], "base", &base))
789 			cmn_err(CE_PANIC, "base property missing from MD"
790 			    " mblock node");
791 		if (md_get_prop_val(mdp, mblock_list[i], "size", &size))
792 			cmn_err(CE_PANIC, "size property missing from MD"
793 			    " mblock node");
794 
795 		ASSERT(size != 0);
796 
797 		new_limit = base + size;
798 
799 		if (base > new_limit)
800 			cmn_err(CE_PANIC, "mblock in MD wrapped around");
801 
802 		if (new_limit > ra_limit)
803 			ra_limit = new_limit;
804 	}
805 
806 	ASSERT(ra_limit != 0);
807 
808 	if (ra_limit > MAX_REAL_ADDRESS) {
809 		cmn_err(CE_WARN, "Highest real address in MD too large"
810 		    " clipping to %llx\n", MAX_REAL_ADDRESS);
811 		ra_limit = MAX_REAL_ADDRESS;
812 	}
813 
814 	md_free_scan_dag(mdp, &mblock_list);
815 
816 	md_free_scan_dag(mdp, &mem_list);
817 
818 	return (ra_limit);
819 }
820 
821 /*
822  * This routine sets the globals for CPU and DEV mondo queue entries and
823  * resumable and non-resumable error queue entries.
824  *
825  * First, look up the number of bits available to pass an entry number.
826  * This can vary by platform and may result in allocating an unreasonably
827  * (or impossibly) large amount of memory for the corresponding table,
828  * so we clamp it by 'max_entries'.  Finally, since the q size is used when
829  * calling contig_mem_alloc(), which expects a power of 2, clamp the q size
830  * down to a power of 2.  If the prop is missing, use 'default_entries'.
831  */
832 static uint64_t
833 get_single_q_size(md_t *mdp, mde_cookie_t cpu_node_cookie,
834     char *qnamep, uint64_t default_entries, uint64_t max_entries)
835 {
836 	uint64_t entries;
837 
838 	if (default_entries > max_entries)
839 		cmn_err(CE_CONT, "!get_single_q_size: dflt %ld > "
840 		    "max %ld for %s\n", default_entries, max_entries, qnamep);
841 
842 	if (md_get_prop_val(mdp, cpu_node_cookie, qnamep, &entries)) {
843 		if (!broken_md_flag)
844 			cmn_err(CE_PANIC, "Missing %s property in MD cpu node",
845 			    qnamep);
846 		entries = default_entries;
847 	} else {
848 		entries = 1 << entries;
849 	}
850 
851 	entries = MIN(entries, max_entries);
852 	/* If not a power of 2, truncate to a power of 2. */
853 	if ((entries & (entries - 1)) != 0) {
854 		entries = 1 << (highbit(entries) - 1);
855 	}
856 
857 	return (entries);
858 }
859 
860 /* Scaling constant used to compute size of cpu mondo queue */
861 #define	CPU_MONDO_Q_MULTIPLIER	8
862 
863 static void
864 get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
865 {
866 	uint64_t max_qsize;
867 	mde_cookie_t *platlist;
868 	int nrnode;
869 
870 	/*
871 	 * Compute the maximum number of entries for the cpu mondo queue.
872 	 * Use the appropriate property in the platform node, if it is
873 	 * available.  Else, base it on NCPU.
874 	 */
875 	nrnode = md_alloc_scan_dag(mdp,
876 	    md_root_node(mdp), "platform", "fwd", &platlist);
877 
878 	ASSERT(nrnode == 1);
879 
880 	ncpu_guest_max = NCPU;
881 	(void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpu_guest_max);
882 	max_qsize = ncpu_guest_max * CPU_MONDO_Q_MULTIPLIER;
883 
884 	md_free_scan_dag(mdp, &platlist);
885 
886 	cpu_q_entries = get_single_q_size(mdp, cpu_node_cookie,
887 	    "q-cpu-mondo-#bits", DEFAULT_CPU_Q_ENTRIES, max_qsize);
888 
889 	dev_q_entries = get_single_q_size(mdp, cpu_node_cookie,
890 	    "q-dev-mondo-#bits", DEFAULT_DEV_Q_ENTRIES, MAXIVNUM);
891 
892 	cpu_rq_entries = get_single_q_size(mdp, cpu_node_cookie,
893 	    "q-resumable-#bits", CPU_RQ_ENTRIES, MAX_CPU_RQ_ENTRIES);
894 
895 	cpu_nrq_entries = get_single_q_size(mdp, cpu_node_cookie,
896 	    "q-nonresumable-#bits", CPU_NRQ_ENTRIES, MAX_CPU_NRQ_ENTRIES);
897 }
898 
899 
900 static void
901 get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
902 {
903 	uint64_t value = VA_ADDRESS_SPACE_BITS;
904 
905 	if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#va-bits", &value))
906 		cmn_err(CE_PANIC, "mmu-#va-bits property  not found in MD");
907 
908 
909 	if (value == 0 || value > VA_ADDRESS_SPACE_BITS)
910 		cmn_err(CE_PANIC, "Incorrect number of va bits in MD");
911 
912 	/* Do not expect number of VA bits to be more than 32-bit quantity */
913 
914 	va_bits = (int)value;
915 
916 	/*
917 	 * Correct the value for VA bits on UltraSPARC-T1 based systems
918 	 * in case of broken MD.
919 	 */
920 	if (broken_md_flag)
921 		va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS;
922 }
923 
924 int
925 l2_cache_node_count(void)
926 {
927 	return (n_l2_caches);
928 }
929 
930 /*
931  * count the number of l2 caches.
932  */
933 int
934 get_l2_cache_node_count(md_t *mdp)
935 {
936 	int i;
937 	mde_cookie_t *cachenodes;
938 	uint64_t level;
939 	int n_cachenodes = md_alloc_scan_dag(mdp, md_root_node(mdp),
940 	    "cache", "fwd", &cachenodes);
941 	int l2_caches = 0;
942 
943 	for (i = 0; i < n_cachenodes; i++) {
944 		if (md_get_prop_val(mdp, cachenodes[i], "level", &level) != 0) {
945 			level = 0;
946 		}
947 		if (level == 2) {
948 			l2_caches++;
949 		}
950 	}
951 	md_free_scan_dag(mdp, &cachenodes);
952 	return (l2_caches);
953 }
954 
955 /*
956  * This routine returns the L2 cache information such as -- associativity,
957  * size and linesize.
958  */
959 static int
960 get_l2_cache_info(md_t *mdp, mde_cookie_t cpu_node_cookie,
961 	    uint64_t *associativity, uint64_t *size, uint64_t *linesize)
962 {
963 	mde_cookie_t *cachelist;
964 	int ncaches, i;
965 	uint64_t cache_level = 0;
966 
967 	ncaches = md_alloc_scan_dag(mdp, cpu_node_cookie, "cache",
968 	    "fwd", &cachelist);
969 	/*
970 	 * The "cache" node is optional in MD, therefore ncaches can be 0.
971 	 */
972 	if (ncaches < 1) {
973 		return (0);
974 	}
975 
976 	for (i = 0; i < ncaches; i++) {
977 		uint64_t local_assoc;
978 		uint64_t local_size;
979 		uint64_t local_lsize;
980 
981 		if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
982 			continue;
983 
984 		if (cache_level != 2) continue;
985 
986 		/* If properties are missing from this cache ignore it */
987 
988 		if ((md_get_prop_val(mdp, cachelist[i],
989 		    "associativity", &local_assoc))) {
990 			continue;
991 		}
992 
993 		if ((md_get_prop_val(mdp, cachelist[i],
994 		    "size", &local_size))) {
995 			continue;
996 		}
997 
998 		if ((md_get_prop_val(mdp, cachelist[i],
999 		    "line-size", &local_lsize))) {
1000 			continue;
1001 		}
1002 
1003 		*associativity = local_assoc;
1004 		*size = local_size;
1005 		*linesize = local_lsize;
1006 		break;
1007 	}
1008 
1009 	md_free_scan_dag(mdp, &cachelist);
1010 
1011 	return ((cache_level == 2) ? 1 : 0);
1012 }
1013 
1014 
1015 /*
1016  * Set the broken_md_flag to 1 if the MD doesn't have
1017  * the domaining-enabled property in the platform node and the
1018  * platform uses the UltraSPARC-T1 cpu. This flag is used to
1019  * workaround some of the incorrect MD properties.
1020  */
1021 static void
1022 init_md_broken(md_t *mdp, mde_cookie_t *cpulist)
1023 {
1024 	int nrnode;
1025 	mde_cookie_t *platlist, rootnode;
1026 	uint64_t val = 0;
1027 	char *namebuf;
1028 	int namelen;
1029 
1030 	rootnode = md_root_node(mdp);
1031 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1032 	ASSERT(cpulist);
1033 
1034 	nrnode = md_alloc_scan_dag(mdp, rootnode, "platform", "fwd",
1035 	    &platlist);
1036 
1037 	if (nrnode < 1)
1038 		cmn_err(CE_PANIC, "init_md_broken: platform node missing");
1039 
1040 	if (md_get_prop_data(mdp, cpulist[0],
1041 	    "compatible", (uint8_t **)&namebuf, &namelen)) {
1042 		cmn_err(CE_PANIC, "init_md_broken: "
1043 		    "Cannot read 'compatible' property of 'cpu' node");
1044 	}
1045 
1046 	if (md_get_prop_val(mdp, platlist[0],
1047 	    "domaining-enabled", &val) == -1 &&
1048 	    strcmp(namebuf, "SUNW,UltraSPARC-T1") == 0)
1049 		broken_md_flag = 1;
1050 
1051 	md_free_scan_dag(mdp, &platlist);
1052 }
1053 
1054 #define	PLAT_MAX_IOALIASES	8
1055 
1056 static plat_alias_t *plat_ioaliases;
1057 static uint64_t plat_num_ioaliases;
1058 
1059 /*
1060  * split the aliases property into its
1061  * component strings for easy searching.
1062  */
1063 static void
1064 split_alias(plat_alias_t *pali, char *str)
1065 {
1066 	char *aliasv[PLAT_MAX_IOALIASES], *p;
1067 	int i, duplen;
1068 	char *dup;
1069 
1070 	/* skip leading space */
1071 	str = dup = strdup(str);
1072 	duplen = strlen(dup) + 1;
1073 	str += strspn(str, " ");
1074 	for (i = 0; *str != '\0'; str = p) {
1075 
1076 		p = strpbrk(str, " ");
1077 		if (p != NULL) {
1078 			*p++ = '\0';
1079 		}
1080 
1081 		VERIFY(i < PLAT_MAX_IOALIASES);
1082 		aliasv[i++] = strdup(str);
1083 		if (p == NULL)
1084 			break;
1085 		p += strspn(p, " ");
1086 	}
1087 
1088 	kmem_free(dup, duplen);
1089 
1090 	if (i == 0) {
1091 		pali->pali_naliases = 0;
1092 		pali->pali_aliases = NULL;
1093 		return;
1094 	}
1095 
1096 	pali->pali_naliases = i;
1097 	pali->pali_aliases = kmem_alloc(i * sizeof (char *), KM_SLEEP);
1098 	for (i = 0; i < pali->pali_naliases; i++) {
1099 		pali->pali_aliases[i] = aliasv[i];
1100 	}
1101 }
1102 
1103 /*
1104  * retrieve the ioalias info from the MD,
1105  * and init the ioalias struct.
1106  *
1107  * NOTE: Assumes that the ioalias info does not change at runtime
1108  * This routine is invoked only once at boot time.
1109  *
1110  * No lock needed as this is called at boot with a DDI lock held
1111  */
1112 void
1113 plat_ioaliases_init(void)
1114 {
1115 	md_t *mdp;
1116 	mde_cookie_t *ionodes, alinode;
1117 	plat_alias_t *pali;
1118 	int nio;
1119 	int i;
1120 	int err;
1121 
1122 	mdp = md_get_handle();
1123 	if (mdp == NULL) {
1124 		cmn_err(CE_PANIC, "no machine description (MD)");
1125 		/*NOTREACHED*/
1126 	}
1127 
1128 	nio = md_alloc_scan_dag(mdp, md_root_node(mdp),
1129 	    "ioaliases", "fwd", &ionodes);
1130 
1131 
1132 	/* not all platforms support aliases */
1133 	if (nio < 1) {
1134 		(void) md_fini_handle(mdp);
1135 		return;
1136 	}
1137 	if (nio > 1) {
1138 		cmn_err(CE_PANIC, "multiple ioalias nodes in MD");
1139 		/*NOTREACHED*/
1140 	}
1141 
1142 	alinode = ionodes[0];
1143 	md_free_scan_dag(mdp, &ionodes);
1144 
1145 	nio = md_alloc_scan_dag(mdp, alinode, "ioalias", "fwd", &ionodes);
1146 	if (nio <= 0) {
1147 		cmn_err(CE_PANIC, "MD alias node has no aliases");
1148 		/*NOTREACHED*/
1149 	}
1150 
1151 	plat_num_ioaliases = nio;
1152 	plat_ioaliases = pali = kmem_zalloc(nio * sizeof (plat_alias_t),
1153 	    KM_SLEEP);
1154 
1155 	/*
1156 	 * Each ioalias map will have a composite property of
1157 	 * aliases and the current valid path.
1158 	 */
1159 	for (i = 0; i < nio; i++) {
1160 		char *str;
1161 
1162 		err = md_get_prop_str(mdp, ionodes[i], "current", &str);
1163 		if (err != 0) {
1164 			cmn_err(CE_PANIC, "malformed ioalias node");
1165 			/*NOTREACHED*/
1166 		}
1167 		pali->pali_current = strdup(str);
1168 
1169 		err = md_get_prop_str(mdp, ionodes[i], "aliases", &str);
1170 		if (err != 0) {
1171 			cmn_err(CE_PANIC, "malformed aliases");
1172 			/*NOTREACHED*/
1173 		}
1174 		DDI_MP_DBG((CE_NOTE, "path: %s aliases %s",
1175 		    pali->pali_current, str));
1176 
1177 		split_alias(pali, str);
1178 		pali++;
1179 	}
1180 
1181 	md_free_scan_dag(mdp, &ionodes);
1182 
1183 	/*
1184 	 * Register the io-aliases array with the DDI framework
1185 	 * The DDI framework assumes that this array and its contents
1186 	 * will not change post-register. The DDI framework will
1187 	 * cache this array and is free to access this array at
1188 	 * any time without any locks.
1189 	 */
1190 	ddi_register_aliases(plat_ioaliases, plat_num_ioaliases);
1191 
1192 	(void) md_fini_handle(mdp);
1193 }
1194 
1195 /*
1196  * Number of bits forming a valid context for use in a sun4v TTE and the MMU
1197  * context registers. Sun4v defines the minimum default value to be 13 if this
1198  * property is not specified in a cpu node in machine descriptor graph.
1199  */
1200 #define	MMU_INFO_CTXBITS_MIN		13
1201 
1202 /* Convert context bits to number of contexts */
1203 #define	MMU_INFO_BNCTXS(nbits)		((uint_t)(1u<<(nbits)))
1204 
1205 /*
1206  * Read machine descriptor and load TLB to CPU mappings.
1207  * Returned values: cpuid2pset[NCPU], nctxs[NCPU], md_gen
1208  * - cpuid2pset is initialized so it can convert cpuids to processor set of CPUs
1209  *   that are shared between TLBs.
1210  * - nctxs is initialized to number of contexts for each CPU
1211  * - md_gen is set to generation number of machine descriptor from which this
1212  *   data was.
1213  * Return: zero on success.
1214  */
1215 static int
1216 load_tlb_cpu_mappings(cpuset_t **cpuid2pset, uint_t *nctxs, uint64_t *md_gen)
1217 {
1218 	mde_str_cookie_t cpu_sc, bck_sc;
1219 	int		tlbs_idx, cp_idx;
1220 	mde_cookie_t	root;
1221 	md_t		*mdp = NULL;
1222 	mde_cookie_t	*tlbs = NULL;
1223 	mde_cookie_t	*cp = NULL;
1224 	uint64_t	*cpids = NULL;
1225 	uint64_t	nbit;
1226 	int		ntlbs;
1227 	int		ncp;
1228 	int		retval = 1;
1229 	cpuset_t	*ppset;
1230 
1231 	/* get MD handle, and string cookies for cpu and back nodes */
1232 	if ((mdp = md_get_handle()) == NULL ||
1233 	    (cpu_sc = md_find_name(mdp, "cpu")) == MDE_INVAL_STR_COOKIE ||
1234 	    (bck_sc = md_find_name(mdp, "back")) == MDE_INVAL_STR_COOKIE)
1235 		goto cleanup;
1236 
1237 	/* set generation number of current MD handle */
1238 	*md_gen = md_get_gen(mdp);
1239 
1240 	/* Find root element, and search for all TLBs in MD */
1241 	if ((root = md_root_node(mdp)) == MDE_INVAL_ELEM_COOKIE ||
1242 	    (ntlbs = md_alloc_scan_dag(mdp, root, "tlb", "fwd", &tlbs)) <= 0)
1243 		goto cleanup;
1244 
1245 	cp = kmem_alloc(sizeof (mde_cookie_t) * NCPU, KM_SLEEP);
1246 	cpids = kmem_alloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
1247 
1248 	/*
1249 	 * Build processor sets, one per possible context domain.  For each tlb,
1250 	 * search for connected CPUs.  If any CPU is already in a set, then add
1251 	 * all the TLB's CPUs to that set.  Otherwise, create and populate a new
1252 	 * pset.  Thus, a single pset is built to represent multiple TLBs if
1253 	 * they have CPUs in common.
1254 	 */
1255 	for (tlbs_idx = 0; tlbs_idx < ntlbs; tlbs_idx++) {
1256 		ncp = md_scan_dag(mdp, tlbs[tlbs_idx], cpu_sc, bck_sc, cp);
1257 		if (ncp < 0)
1258 			goto cleanup;
1259 		else if (ncp == 0)
1260 			continue;
1261 
1262 		/* Get the id and number of contexts for each cpu */
1263 		for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1264 			mde_cookie_t c = cp[cp_idx];
1265 
1266 			if (md_get_prop_val(mdp, c, "id", &cpids[cp_idx]))
1267 				goto cleanup;
1268 			if (md_get_prop_val(mdp, c, "mmu-#context-bits", &nbit))
1269 				nbit = MMU_INFO_CTXBITS_MIN;
1270 			nctxs[cpids[cp_idx]] = MMU_INFO_BNCTXS(nbit);
1271 		}
1272 
1273 		/*
1274 		 * If a CPU is already in a set as shown by cpuid2pset[], then
1275 		 * use that set.
1276 		 */
1277 		for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1278 			ASSERT(cpids[cp_idx] < NCPU);
1279 			ppset = cpuid2pset[cpids[cp_idx]];
1280 			if (ppset != NULL)
1281 				break;
1282 		}
1283 
1284 		/* No CPU has a set. Create a new one. */
1285 		if (ppset == NULL) {
1286 			ppset = kmem_alloc(sizeof (cpuset_t), KM_SLEEP);
1287 			CPUSET_ZERO(*ppset);
1288 		}
1289 
1290 		/* Add every CPU to the set, and record the set assignment. */
1291 		for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1292 			cpuid2pset[cpids[cp_idx]] = ppset;
1293 			CPUSET_ADD(*ppset, cpids[cp_idx]);
1294 		}
1295 	}
1296 
1297 	retval = 0;
1298 
1299 cleanup:
1300 	if (tlbs != NULL)
1301 		md_free_scan_dag(mdp, &tlbs);
1302 	if (cp != NULL)
1303 		kmem_free(cp, sizeof (mde_cookie_t) * NCPU);
1304 	if (cpids != NULL)
1305 		kmem_free(cpids, sizeof (uint64_t) * NCPU);
1306 	if (mdp != NULL)
1307 		(void) md_fini_handle(mdp);
1308 
1309 	return (retval);
1310 }
1311 
1312 /*
1313  * Return MMU info based on cpuid.
1314  *
1315  * Algorithm:
1316  * Read machine descriptor and find all CPUs that share the same TLB with CPU
1317  * specified by cpuid. Go through found CPUs and see if any one of them already
1318  * has MMU index, if so, set index based on that value. If CPU does not share
1319  * TLB with any other CPU or if none of those CPUs has mmu_ctx pointer, find the
1320  * smallest available MMU index and give it to current CPU. If no available
1321  * domain, perform a round robin, and start assigning from the beginning.
1322  *
1323  * For optimization reasons, this function uses a cache to store all TLB to CPU
1324  * mappings, and updates them only when machine descriptor graph is changed.
1325  * Because of this, and because we search MMU table for smallest index id, this
1326  * function needs to be serialized which is protected by cpu_lock.
1327  */
1328 void
1329 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info)
1330 {
1331 	static cpuset_t	**cpuid2pset = NULL;
1332 	static uint_t	*nctxs;
1333 	static uint_t	next_domain = 0;
1334 	static uint64_t	md_gen = MDESC_INVAL_GEN;
1335 	uint64_t	current_gen;
1336 	int		idx;
1337 	cpuset_t	cpuid_pset;
1338 	processorid_t	id;
1339 	cpu_t		*cp;
1340 
1341 	ASSERT(MUTEX_HELD(&cpu_lock));
1342 
1343 	current_gen = md_get_current_gen();
1344 
1345 	/*
1346 	 * Load TLB CPU mappings only if MD generation has changed, FW that do
1347 	 * not provide generation number, always return MDESC_INVAL_GEN, and as
1348 	 * result MD is read here only once on such machines: when cpuid2pset is
1349 	 * NULL
1350 	 */
1351 	if (current_gen != md_gen || cpuid2pset == NULL) {
1352 		if (cpuid2pset == NULL) {
1353 			cpuid2pset = kmem_zalloc(sizeof (cpuset_t *) * NCPU,
1354 			    KM_SLEEP);
1355 			nctxs = kmem_alloc(sizeof (uint_t) * NCPU, KM_SLEEP);
1356 		} else {
1357 			/* clean cpuid2pset[NCPU], before loading new values */
1358 			for (idx = 0; idx < NCPU; idx++) {
1359 				cpuset_t *pset = cpuid2pset[idx];
1360 
1361 				if (pset != NULL) {
1362 					for (;;) {
1363 						CPUSET_FIND(*pset, id);
1364 						if (id == CPUSET_NOTINSET)
1365 							break;
1366 						CPUSET_DEL(*pset, id);
1367 						ASSERT(id < NCPU);
1368 						cpuid2pset[id] = NULL;
1369 					}
1370 					ASSERT(cpuid2pset[idx] == NULL);
1371 					kmem_free(pset, sizeof (cpuset_t));
1372 				}
1373 			}
1374 		}
1375 
1376 		if (load_tlb_cpu_mappings(cpuid2pset, nctxs, &md_gen))
1377 			goto error_panic;
1378 	}
1379 
1380 	info->mmu_nctxs = nctxs[cpuid];
1381 
1382 	if (cpuid2pset[cpuid] == NULL)
1383 		goto error_panic;
1384 
1385 	cpuid_pset = *cpuid2pset[cpuid];
1386 	CPUSET_DEL(cpuid_pset, cpuid);
1387 
1388 	/* Search for a processor in the same TLB pset with MMU context */
1389 	for (;;) {
1390 		CPUSET_FIND(cpuid_pset, id);
1391 
1392 		if (id == CPUSET_NOTINSET)
1393 			break;
1394 
1395 		ASSERT(id < NCPU);
1396 		cp = cpu[id];
1397 		if (cp != NULL && CPU_MMU_CTXP(cp) != NULL) {
1398 			info->mmu_idx = CPU_MMU_IDX(cp);
1399 
1400 			return;
1401 		}
1402 		CPUSET_DEL(cpuid_pset, id);
1403 	}
1404 
1405 	/*
1406 	 * No CPU in the TLB pset has a context domain yet.
1407 	 * Use next_domain if available, or search for an unused domain, or
1408 	 * overload next_domain, in that order.  Overloading is necessary when
1409 	 * the number of TLB psets is greater than max_mmu_ctxdoms.
1410 	 */
1411 	idx = next_domain;
1412 
1413 	if (mmu_ctxs_tbl[idx] != NULL) {
1414 		for (idx = 0; idx < max_mmu_ctxdoms; idx++)
1415 			if (mmu_ctxs_tbl[idx] == NULL)
1416 				break;
1417 		if (idx == max_mmu_ctxdoms) {
1418 			/* overload next_domain */
1419 			idx = next_domain;
1420 
1421 			if (info->mmu_nctxs < sfmmu_ctxdom_nctxs(idx))
1422 				cmn_err(CE_PANIC, "max_mmu_ctxdoms is too small"
1423 				    " to support CPUs with different nctxs");
1424 		}
1425 	}
1426 
1427 	info->mmu_idx = idx;
1428 	next_domain = (idx + 1) % max_mmu_ctxdoms;
1429 
1430 	return;
1431 
1432 error_panic:
1433 	cmn_err(CE_PANIC, "!cpu%d: failed to get MMU CTX domain index", cpuid);
1434 }
1435